diff options
Diffstat (limited to 'arch/x86')
163 files changed, 9444 insertions, 10839 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b198c018efc4..153aa6f78299 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -20,6 +20,7 @@ config X86 | |||
20 | select HAVE_UNSTABLE_SCHED_CLOCK | 20 | select HAVE_UNSTABLE_SCHED_CLOCK |
21 | select HAVE_IDE | 21 | select HAVE_IDE |
22 | select HAVE_OPROFILE | 22 | select HAVE_OPROFILE |
23 | select HAVE_PCSPKR_PLATFORM | ||
23 | select HAVE_PERF_EVENTS | 24 | select HAVE_PERF_EVENTS |
24 | select HAVE_IRQ_WORK | 25 | select HAVE_IRQ_WORK |
25 | select HAVE_IOREMAP_PROT | 26 | select HAVE_IOREMAP_PROT |
@@ -70,6 +71,7 @@ config X86 | |||
70 | select IRQ_FORCED_THREADING | 71 | select IRQ_FORCED_THREADING |
71 | select USE_GENERIC_SMP_HELPERS if SMP | 72 | select USE_GENERIC_SMP_HELPERS if SMP |
72 | select HAVE_BPF_JIT if (X86_64 && NET) | 73 | select HAVE_BPF_JIT if (X86_64 && NET) |
74 | select CLKEVT_I8253 | ||
73 | 75 | ||
74 | config INSTRUCTION_DECODER | 76 | config INSTRUCTION_DECODER |
75 | def_bool (KPROBES || PERF_EVENTS) | 77 | def_bool (KPROBES || PERF_EVENTS) |
@@ -93,6 +95,10 @@ config CLOCKSOURCE_WATCHDOG | |||
93 | config GENERIC_CLOCKEVENTS | 95 | config GENERIC_CLOCKEVENTS |
94 | def_bool y | 96 | def_bool y |
95 | 97 | ||
98 | config ARCH_CLOCKSOURCE_DATA | ||
99 | def_bool y | ||
100 | depends on X86_64 | ||
101 | |||
96 | config GENERIC_CLOCKEVENTS_BROADCAST | 102 | config GENERIC_CLOCKEVENTS_BROADCAST |
97 | def_bool y | 103 | def_bool y |
98 | depends on X86_64 || (X86_32 && X86_LOCAL_APIC) | 104 | depends on X86_64 || (X86_32 && X86_LOCAL_APIC) |
@@ -384,12 +390,21 @@ config X86_INTEL_CE | |||
384 | This option compiles in support for the CE4100 SOC for settop | 390 | This option compiles in support for the CE4100 SOC for settop |
385 | boxes and media devices. | 391 | boxes and media devices. |
386 | 392 | ||
393 | config X86_INTEL_MID | ||
394 | bool "Intel MID platform support" | ||
395 | depends on X86_32 | ||
396 | depends on X86_EXTENDED_PLATFORM | ||
397 | ---help--- | ||
398 | Select to build a kernel capable of supporting Intel MID platform | ||
399 | systems which do not have the PCI legacy interfaces (Moorestown, | ||
400 | Medfield). If you are building for a PC class system say N here. | ||
401 | |||
402 | if X86_INTEL_MID | ||
403 | |||
387 | config X86_MRST | 404 | config X86_MRST |
388 | bool "Moorestown MID platform" | 405 | bool "Moorestown MID platform" |
389 | depends on PCI | 406 | depends on PCI |
390 | depends on PCI_GOANY | 407 | depends on PCI_GOANY |
391 | depends on X86_32 | ||
392 | depends on X86_EXTENDED_PLATFORM | ||
393 | depends on X86_IO_APIC | 408 | depends on X86_IO_APIC |
394 | select APB_TIMER | 409 | select APB_TIMER |
395 | select I2C | 410 | select I2C |
@@ -404,6 +419,8 @@ config X86_MRST | |||
404 | nor standard legacy replacement devices/features. e.g. Moorestown does | 419 | nor standard legacy replacement devices/features. e.g. Moorestown does |
405 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | 420 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. |
406 | 421 | ||
422 | endif | ||
423 | |||
407 | config X86_RDC321X | 424 | config X86_RDC321X |
408 | bool "RDC R-321x SoC" | 425 | bool "RDC R-321x SoC" |
409 | depends on X86_32 | 426 | depends on X86_32 |
@@ -512,6 +529,18 @@ menuconfig PARAVIRT_GUEST | |||
512 | 529 | ||
513 | if PARAVIRT_GUEST | 530 | if PARAVIRT_GUEST |
514 | 531 | ||
532 | config PARAVIRT_TIME_ACCOUNTING | ||
533 | bool "Paravirtual steal time accounting" | ||
534 | select PARAVIRT | ||
535 | default n | ||
536 | ---help--- | ||
537 | Select this option to enable fine granularity task steal time | ||
538 | accounting. Time spent executing other tasks in parallel with | ||
539 | the current vCPU is discounted from the vCPU power. To account for | ||
540 | that, there can be a small performance impact. | ||
541 | |||
542 | If in doubt, say N here. | ||
543 | |||
515 | source "arch/x86/xen/Kconfig" | 544 | source "arch/x86/xen/Kconfig" |
516 | 545 | ||
517 | config KVM_CLOCK | 546 | config KVM_CLOCK |
@@ -617,6 +646,7 @@ config HPET_EMULATE_RTC | |||
617 | config APB_TIMER | 646 | config APB_TIMER |
618 | def_bool y if MRST | 647 | def_bool y if MRST |
619 | prompt "Langwell APB Timer Support" if X86_MRST | 648 | prompt "Langwell APB Timer Support" if X86_MRST |
649 | select DW_APB_TIMER | ||
620 | help | 650 | help |
621 | APB timer is the replacement for 8254, HPET on X86 MID platforms. | 651 | APB timer is the replacement for 8254, HPET on X86 MID platforms. |
622 | The APBT provides a stable time base on SMP | 652 | The APBT provides a stable time base on SMP |
@@ -680,33 +710,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT | |||
680 | Calgary anyway, pass 'iommu=calgary' on the kernel command line. | 710 | Calgary anyway, pass 'iommu=calgary' on the kernel command line. |
681 | If unsure, say Y. | 711 | If unsure, say Y. |
682 | 712 | ||
683 | config AMD_IOMMU | ||
684 | bool "AMD IOMMU support" | ||
685 | select SWIOTLB | ||
686 | select PCI_MSI | ||
687 | select PCI_IOV | ||
688 | depends on X86_64 && PCI && ACPI | ||
689 | ---help--- | ||
690 | With this option you can enable support for AMD IOMMU hardware in | ||
691 | your system. An IOMMU is a hardware component which provides | ||
692 | remapping of DMA memory accesses from devices. With an AMD IOMMU you | ||
693 | can isolate the the DMA memory of different devices and protect the | ||
694 | system from misbehaving device drivers or hardware. | ||
695 | |||
696 | You can find out if your system has an AMD IOMMU if you look into | ||
697 | your BIOS for an option to enable it or if you have an IVRS ACPI | ||
698 | table. | ||
699 | |||
700 | config AMD_IOMMU_STATS | ||
701 | bool "Export AMD IOMMU statistics to debugfs" | ||
702 | depends on AMD_IOMMU | ||
703 | select DEBUG_FS | ||
704 | ---help--- | ||
705 | This option enables code in the AMD IOMMU driver to collect various | ||
706 | statistics about whats happening in the driver and exports that | ||
707 | information to userspace via debugfs. | ||
708 | If unsure, say N. | ||
709 | |||
710 | # need this always selected by IOMMU for the VIA workaround | 713 | # need this always selected by IOMMU for the VIA workaround |
711 | config SWIOTLB | 714 | config SWIOTLB |
712 | def_bool y if X86_64 | 715 | def_bool y if X86_64 |
@@ -720,9 +723,6 @@ config SWIOTLB | |||
720 | config IOMMU_HELPER | 723 | config IOMMU_HELPER |
721 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) | 724 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) |
722 | 725 | ||
723 | config IOMMU_API | ||
724 | def_bool (AMD_IOMMU || DMAR) | ||
725 | |||
726 | config MAXSMP | 726 | config MAXSMP |
727 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" | 727 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" |
728 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL | 728 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL |
@@ -1170,7 +1170,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" | |||
1170 | config AMD_NUMA | 1170 | config AMD_NUMA |
1171 | def_bool y | 1171 | def_bool y |
1172 | prompt "Old style AMD Opteron NUMA detection" | 1172 | prompt "Old style AMD Opteron NUMA detection" |
1173 | depends on NUMA && PCI | 1173 | depends on X86_64 && NUMA && PCI |
1174 | ---help--- | 1174 | ---help--- |
1175 | Enable AMD NUMA node topology detection. You should say Y here if | 1175 | Enable AMD NUMA node topology detection. You should say Y here if |
1176 | you have a multi processor AMD system. This uses an old method to | 1176 | you have a multi processor AMD system. This uses an old method to |
@@ -1737,8 +1737,8 @@ menuconfig APM | |||
1737 | machines with more than one CPU. | 1737 | machines with more than one CPU. |
1738 | 1738 | ||
1739 | In order to use APM, you will need supporting software. For location | 1739 | In order to use APM, you will need supporting software. For location |
1740 | and more information, read <file:Documentation/power/pm.txt> and the | 1740 | and more information, read <file:Documentation/power/apm-acpi.txt> |
1741 | Battery Powered Linux mini-HOWTO, available from | 1741 | and the Battery Powered Linux mini-HOWTO, available from |
1742 | <http://www.tldp.org/docs.html#howto>. | 1742 | <http://www.tldp.org/docs.html#howto>. |
1743 | 1743 | ||
1744 | This driver does not spin down disk drives (see the hdparm(8) | 1744 | This driver does not spin down disk drives (see the hdparm(8) |
@@ -1942,55 +1942,6 @@ config PCI_CNB20LE_QUIRK | |||
1942 | 1942 | ||
1943 | You should say N unless you know you need this. | 1943 | You should say N unless you know you need this. |
1944 | 1944 | ||
1945 | config DMAR | ||
1946 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" | ||
1947 | depends on PCI_MSI && ACPI && EXPERIMENTAL | ||
1948 | help | ||
1949 | DMA remapping (DMAR) devices support enables independent address | ||
1950 | translations for Direct Memory Access (DMA) from devices. | ||
1951 | These DMA remapping devices are reported via ACPI tables | ||
1952 | and include PCI device scope covered by these DMA | ||
1953 | remapping devices. | ||
1954 | |||
1955 | config DMAR_DEFAULT_ON | ||
1956 | def_bool y | ||
1957 | prompt "Enable DMA Remapping Devices by default" | ||
1958 | depends on DMAR | ||
1959 | help | ||
1960 | Selecting this option will enable a DMAR device at boot time if | ||
1961 | one is found. If this option is not selected, DMAR support can | ||
1962 | be enabled by passing intel_iommu=on to the kernel. It is | ||
1963 | recommended you say N here while the DMAR code remains | ||
1964 | experimental. | ||
1965 | |||
1966 | config DMAR_BROKEN_GFX_WA | ||
1967 | bool "Workaround broken graphics drivers (going away soon)" | ||
1968 | depends on DMAR && BROKEN | ||
1969 | ---help--- | ||
1970 | Current Graphics drivers tend to use physical address | ||
1971 | for DMA and avoid using DMA APIs. Setting this config | ||
1972 | option permits the IOMMU driver to set a unity map for | ||
1973 | all the OS-visible memory. Hence the driver can continue | ||
1974 | to use physical addresses for DMA, at least until this | ||
1975 | option is removed in the 2.6.32 kernel. | ||
1976 | |||
1977 | config DMAR_FLOPPY_WA | ||
1978 | def_bool y | ||
1979 | depends on DMAR | ||
1980 | ---help--- | ||
1981 | Floppy disk drivers are known to bypass DMA API calls | ||
1982 | thereby failing to work when IOMMU is enabled. This | ||
1983 | workaround will setup a 1:1 mapping for the first | ||
1984 | 16MiB to make floppy (an ISA device) work. | ||
1985 | |||
1986 | config INTR_REMAP | ||
1987 | bool "Support for Interrupt Remapping (EXPERIMENTAL)" | ||
1988 | depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL | ||
1989 | ---help--- | ||
1990 | Supports Interrupt remapping for IO-APIC and MSI devices. | ||
1991 | To use x2apic mode in the CPU's which support x2APIC enhancements or | ||
1992 | to support platforms with CPU's having > 8 bit APIC ID, say Y. | ||
1993 | |||
1994 | source "drivers/pci/pcie/Kconfig" | 1945 | source "drivers/pci/pcie/Kconfig" |
1995 | 1946 | ||
1996 | source "drivers/pci/Kconfig" | 1947 | source "drivers/pci/Kconfig" |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6a7cfdf8ff69..e3ca7e0d858c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -312,6 +312,9 @@ config X86_CMPXCHG | |||
312 | config CMPXCHG_LOCAL | 312 | config CMPXCHG_LOCAL |
313 | def_bool X86_64 || (X86_32 && !M386) | 313 | def_bool X86_64 || (X86_32 && !M386) |
314 | 314 | ||
315 | config CMPXCHG_DOUBLE | ||
316 | def_bool y | ||
317 | |||
315 | config X86_L1_CACHE_SHIFT | 318 | config X86_L1_CACHE_SHIFT |
316 | int | 319 | int |
317 | default "7" if MPENTIUM4 || MPSC | 320 | default "7" if MPENTIUM4 || MPSC |
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index f7cb086b4add..95365a82b6a0 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -9,12 +9,6 @@ | |||
9 | # Changed by many, many contributors over the years. | 9 | # Changed by many, many contributors over the years. |
10 | # | 10 | # |
11 | 11 | ||
12 | # ROOT_DEV specifies the default root-device when making the image. | ||
13 | # This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case | ||
14 | # the default of FLOPPY is used by 'build'. | ||
15 | |||
16 | ROOT_DEV := CURRENT | ||
17 | |||
18 | # If you want to preset the SVGA mode, uncomment the next line and | 12 | # If you want to preset the SVGA mode, uncomment the next line and |
19 | # set SVGA_MODE to whatever number you want. | 13 | # set SVGA_MODE to whatever number you want. |
20 | # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. | 14 | # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. |
@@ -75,8 +69,7 @@ GCOV_PROFILE := n | |||
75 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) | 69 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) |
76 | 70 | ||
77 | quiet_cmd_image = BUILD $@ | 71 | quiet_cmd_image = BUILD $@ |
78 | cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ | 72 | cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin > $@ |
79 | $(ROOT_DEV) > $@ | ||
80 | 73 | ||
81 | $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE | 74 | $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE |
82 | $(call if_changed,image) | 75 | $(call if_changed,image) |
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index ee3a4ea923ac..fdc60a0b3c20 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -130,7 +130,7 @@ static void die(const char * str, ...) | |||
130 | 130 | ||
131 | static void usage(void) | 131 | static void usage(void) |
132 | { | 132 | { |
133 | die("Usage: build setup system [rootdev] [> image]"); | 133 | die("Usage: build setup system [> image]"); |
134 | } | 134 | } |
135 | 135 | ||
136 | int main(int argc, char ** argv) | 136 | int main(int argc, char ** argv) |
@@ -138,39 +138,14 @@ int main(int argc, char ** argv) | |||
138 | unsigned int i, sz, setup_sectors; | 138 | unsigned int i, sz, setup_sectors; |
139 | int c; | 139 | int c; |
140 | u32 sys_size; | 140 | u32 sys_size; |
141 | u8 major_root, minor_root; | ||
142 | struct stat sb; | 141 | struct stat sb; |
143 | FILE *file; | 142 | FILE *file; |
144 | int fd; | 143 | int fd; |
145 | void *kernel; | 144 | void *kernel; |
146 | u32 crc = 0xffffffffUL; | 145 | u32 crc = 0xffffffffUL; |
147 | 146 | ||
148 | if ((argc < 3) || (argc > 4)) | 147 | if (argc != 3) |
149 | usage(); | 148 | usage(); |
150 | if (argc > 3) { | ||
151 | if (!strcmp(argv[3], "CURRENT")) { | ||
152 | if (stat("/", &sb)) { | ||
153 | perror("/"); | ||
154 | die("Couldn't stat /"); | ||
155 | } | ||
156 | major_root = major(sb.st_dev); | ||
157 | minor_root = minor(sb.st_dev); | ||
158 | } else if (strcmp(argv[3], "FLOPPY")) { | ||
159 | if (stat(argv[3], &sb)) { | ||
160 | perror(argv[3]); | ||
161 | die("Couldn't stat root device."); | ||
162 | } | ||
163 | major_root = major(sb.st_rdev); | ||
164 | minor_root = minor(sb.st_rdev); | ||
165 | } else { | ||
166 | major_root = 0; | ||
167 | minor_root = 0; | ||
168 | } | ||
169 | } else { | ||
170 | major_root = DEFAULT_MAJOR_ROOT; | ||
171 | minor_root = DEFAULT_MINOR_ROOT; | ||
172 | } | ||
173 | fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root); | ||
174 | 149 | ||
175 | /* Copy the setup code */ | 150 | /* Copy the setup code */ |
176 | file = fopen(argv[1], "r"); | 151 | file = fopen(argv[1], "r"); |
@@ -193,8 +168,8 @@ int main(int argc, char ** argv) | |||
193 | memset(buf+c, 0, i-c); | 168 | memset(buf+c, 0, i-c); |
194 | 169 | ||
195 | /* Set the default root device */ | 170 | /* Set the default root device */ |
196 | buf[508] = minor_root; | 171 | buf[508] = DEFAULT_MINOR_ROOT; |
197 | buf[509] = major_root; | 172 | buf[509] = DEFAULT_MAJOR_ROOT; |
198 | 173 | ||
199 | fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); | 174 | fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); |
200 | 175 | ||
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 7a6e68e4f748..976aa64d9a20 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -245,7 +245,7 @@ static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, | |||
245 | crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) | 245 | crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) |
246 | & CRYPTO_TFM_RES_MASK); | 246 | & CRYPTO_TFM_RES_MASK); |
247 | 247 | ||
248 | return 0; | 248 | return err; |
249 | } | 249 | } |
250 | 250 | ||
251 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) | 251 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 588a7aa937e1..65577698cab2 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -127,15 +127,17 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) | |||
127 | 127 | ||
128 | asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) | 128 | asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) |
129 | { | 129 | { |
130 | mask &= _BLOCKABLE; | 130 | sigset_t blocked; |
131 | spin_lock_irq(¤t->sighand->siglock); | 131 | |
132 | current->saved_sigmask = current->blocked; | 132 | current->saved_sigmask = current->blocked; |
133 | siginitset(¤t->blocked, mask); | 133 | |
134 | recalc_sigpending(); | 134 | mask &= _BLOCKABLE; |
135 | spin_unlock_irq(¤t->sighand->siglock); | 135 | siginitset(&blocked, mask); |
136 | set_current_blocked(&blocked); | ||
136 | 137 | ||
137 | current->state = TASK_INTERRUPTIBLE; | 138 | current->state = TASK_INTERRUPTIBLE; |
138 | schedule(); | 139 | schedule(); |
140 | |||
139 | set_restore_sigmask(); | 141 | set_restore_sigmask(); |
140 | return -ERESTARTNOHAND; | 142 | return -ERESTARTNOHAND; |
141 | } | 143 | } |
@@ -279,10 +281,7 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs) | |||
279 | goto badframe; | 281 | goto badframe; |
280 | 282 | ||
281 | sigdelsetmask(&set, ~_BLOCKABLE); | 283 | sigdelsetmask(&set, ~_BLOCKABLE); |
282 | spin_lock_irq(¤t->sighand->siglock); | 284 | set_current_blocked(&set); |
283 | current->blocked = set; | ||
284 | recalc_sigpending(); | ||
285 | spin_unlock_irq(¤t->sighand->siglock); | ||
286 | 285 | ||
287 | if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) | 286 | if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) |
288 | goto badframe; | 287 | goto badframe; |
@@ -308,10 +307,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) | |||
308 | goto badframe; | 307 | goto badframe; |
309 | 308 | ||
310 | sigdelsetmask(&set, ~_BLOCKABLE); | 309 | sigdelsetmask(&set, ~_BLOCKABLE); |
311 | spin_lock_irq(¤t->sighand->siglock); | 310 | set_current_blocked(&set); |
312 | current->blocked = set; | ||
313 | recalc_sigpending(); | ||
314 | spin_unlock_irq(¤t->sighand->siglock); | ||
315 | 311 | ||
316 | if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 312 | if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
317 | goto badframe; | 313 | goto badframe; |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index c1870dddd322..a0e866d233ee 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -143,7 +143,7 @@ ENTRY(ia32_sysenter_target) | |||
143 | CFI_REL_OFFSET rip,0 | 143 | CFI_REL_OFFSET rip,0 |
144 | pushq_cfi %rax | 144 | pushq_cfi %rax |
145 | cld | 145 | cld |
146 | SAVE_ARGS 0,0,1 | 146 | SAVE_ARGS 0,1,0 |
147 | /* no need to do an access_ok check here because rbp has been | 147 | /* no need to do an access_ok check here because rbp has been |
148 | 32bit zero extended */ | 148 | 32bit zero extended */ |
149 | 1: movl (%rbp),%ebp | 149 | 1: movl (%rbp),%ebp |
@@ -173,7 +173,7 @@ sysexit_from_sys_call: | |||
173 | andl $~0x200,EFLAGS-R11(%rsp) | 173 | andl $~0x200,EFLAGS-R11(%rsp) |
174 | movl RIP-R11(%rsp),%edx /* User %eip */ | 174 | movl RIP-R11(%rsp),%edx /* User %eip */ |
175 | CFI_REGISTER rip,rdx | 175 | CFI_REGISTER rip,rdx |
176 | RESTORE_ARGS 1,24,1,1,1,1 | 176 | RESTORE_ARGS 0,24,0,0,0,0 |
177 | xorq %r8,%r8 | 177 | xorq %r8,%r8 |
178 | xorq %r9,%r9 | 178 | xorq %r9,%r9 |
179 | xorq %r10,%r10 | 179 | xorq %r10,%r10 |
@@ -289,7 +289,7 @@ ENTRY(ia32_cstar_target) | |||
289 | * disabled irqs and here we enable it straight after entry: | 289 | * disabled irqs and here we enable it straight after entry: |
290 | */ | 290 | */ |
291 | ENABLE_INTERRUPTS(CLBR_NONE) | 291 | ENABLE_INTERRUPTS(CLBR_NONE) |
292 | SAVE_ARGS 8,1,1 | 292 | SAVE_ARGS 8,0,0 |
293 | movl %eax,%eax /* zero extension */ | 293 | movl %eax,%eax /* zero extension */ |
294 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 294 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
295 | movq %rcx,RIP-ARGOFFSET(%rsp) | 295 | movq %rcx,RIP-ARGOFFSET(%rsp) |
@@ -328,7 +328,7 @@ cstar_dispatch: | |||
328 | jnz sysretl_audit | 328 | jnz sysretl_audit |
329 | sysretl_from_sys_call: | 329 | sysretl_from_sys_call: |
330 | andl $~TS_COMPAT,TI_status(%r10) | 330 | andl $~TS_COMPAT,TI_status(%r10) |
331 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 | 331 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 |
332 | movl RIP-ARGOFFSET(%rsp),%ecx | 332 | movl RIP-ARGOFFSET(%rsp),%ecx |
333 | CFI_REGISTER rip,rcx | 333 | CFI_REGISTER rip,rcx |
334 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 334 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
@@ -419,7 +419,7 @@ ENTRY(ia32_syscall) | |||
419 | cld | 419 | cld |
420 | /* note the registers are not zero extended to the sf. | 420 | /* note the registers are not zero extended to the sf. |
421 | this could be a problem. */ | 421 | this could be a problem. */ |
422 | SAVE_ARGS 0,0,1 | 422 | SAVE_ARGS 0,1,0 |
423 | GET_THREAD_INFO(%r10) | 423 | GET_THREAD_INFO(%r10) |
424 | orl $TS_COMPAT,TI_status(%r10) | 424 | orl $TS_COMPAT,TI_status(%r10) |
425 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | 425 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 94d420b360d1..4554cc6fb96a 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -17,8 +17,8 @@ | |||
17 | 17 | ||
18 | .macro altinstruction_entry orig alt feature orig_len alt_len | 18 | .macro altinstruction_entry orig alt feature orig_len alt_len |
19 | .align 8 | 19 | .align 8 |
20 | .quad \orig | 20 | .long \orig - . |
21 | .quad \alt | 21 | .long \alt - . |
22 | .word \feature | 22 | .word \feature |
23 | .byte \orig_len | 23 | .byte \orig_len |
24 | .byte \alt_len | 24 | .byte \alt_len |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index bf535f947e8c..23fb6d79f209 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -43,8 +43,8 @@ | |||
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | struct alt_instr { | 45 | struct alt_instr { |
46 | u8 *instr; /* original instruction */ | 46 | s32 instr_offset; /* original instruction */ |
47 | u8 *replacement; | 47 | s32 repl_offset; /* offset to replacement instruction */ |
48 | u16 cpuid; /* cpuid bit set for replacement */ | 48 | u16 cpuid; /* cpuid bit set for replacement */ |
49 | u8 instrlen; /* length of original instruction */ | 49 | u8 instrlen; /* length of original instruction */ |
50 | u8 replacementlen; /* length of new instruction, <= instrlen */ | 50 | u8 replacementlen; /* length of new instruction, <= instrlen */ |
@@ -84,8 +84,8 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
84 | "661:\n\t" oldinstr "\n662:\n" \ | 84 | "661:\n\t" oldinstr "\n662:\n" \ |
85 | ".section .altinstructions,\"a\"\n" \ | 85 | ".section .altinstructions,\"a\"\n" \ |
86 | _ASM_ALIGN "\n" \ | 86 | _ASM_ALIGN "\n" \ |
87 | _ASM_PTR "661b\n" /* label */ \ | 87 | " .long 661b - .\n" /* label */ \ |
88 | _ASM_PTR "663f\n" /* new instruction */ \ | 88 | " .long 663f - .\n" /* new instruction */ \ |
89 | " .word " __stringify(feature) "\n" /* feature bit */ \ | 89 | " .word " __stringify(feature) "\n" /* feature bit */ \ |
90 | " .byte 662b-661b\n" /* sourcelen */ \ | 90 | " .byte 662b-661b\n" /* sourcelen */ \ |
91 | " .byte 664f-663f\n" /* replacementlen */ \ | 91 | " .byte 664f-663f\n" /* replacementlen */ \ |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h deleted file mode 100644 index a6863a2dec1f..000000000000 --- a/arch/x86/include/asm/amd_iommu.h +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. | ||
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | ||
4 | * Leo Duran <leo.duran@amd.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #ifndef _ASM_X86_AMD_IOMMU_H | ||
21 | #define _ASM_X86_AMD_IOMMU_H | ||
22 | |||
23 | #include <linux/irqreturn.h> | ||
24 | |||
25 | #ifdef CONFIG_AMD_IOMMU | ||
26 | |||
27 | extern int amd_iommu_detect(void); | ||
28 | |||
29 | #else | ||
30 | |||
31 | static inline int amd_iommu_detect(void) { return -ENODEV; } | ||
32 | |||
33 | #endif | ||
34 | |||
35 | #endif /* _ASM_X86_AMD_IOMMU_H */ | ||
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h deleted file mode 100644 index 55d95eb789b3..000000000000 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. | ||
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License version 2 as published | ||
7 | * by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | |||
19 | #ifndef _ASM_X86_AMD_IOMMU_PROTO_H | ||
20 | #define _ASM_X86_AMD_IOMMU_PROTO_H | ||
21 | |||
22 | #include <asm/amd_iommu_types.h> | ||
23 | |||
24 | extern int amd_iommu_init_dma_ops(void); | ||
25 | extern int amd_iommu_init_passthrough(void); | ||
26 | extern irqreturn_t amd_iommu_int_thread(int irq, void *data); | ||
27 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | ||
28 | extern void amd_iommu_apply_erratum_63(u16 devid); | ||
29 | extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); | ||
30 | extern int amd_iommu_init_devices(void); | ||
31 | extern void amd_iommu_uninit_devices(void); | ||
32 | extern void amd_iommu_init_notifier(void); | ||
33 | extern void amd_iommu_init_api(void); | ||
34 | #ifndef CONFIG_AMD_IOMMU_STATS | ||
35 | |||
36 | static inline void amd_iommu_stats_init(void) { } | ||
37 | |||
38 | #endif /* !CONFIG_AMD_IOMMU_STATS */ | ||
39 | |||
40 | static inline bool is_rd890_iommu(struct pci_dev *pdev) | ||
41 | { | ||
42 | return (pdev->vendor == PCI_VENDOR_ID_ATI) && | ||
43 | (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); | ||
44 | } | ||
45 | |||
46 | static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) | ||
47 | { | ||
48 | if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) | ||
49 | return false; | ||
50 | |||
51 | return !!(iommu->features & f); | ||
52 | } | ||
53 | |||
54 | #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ | ||
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h deleted file mode 100644 index 4c9982995414..000000000000 --- a/arch/x86/include/asm/amd_iommu_types.h +++ /dev/null | |||
@@ -1,580 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. | ||
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | ||
4 | * Leo Duran <leo.duran@amd.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #ifndef _ASM_X86_AMD_IOMMU_TYPES_H | ||
21 | #define _ASM_X86_AMD_IOMMU_TYPES_H | ||
22 | |||
23 | #include <linux/types.h> | ||
24 | #include <linux/mutex.h> | ||
25 | #include <linux/list.h> | ||
26 | #include <linux/spinlock.h> | ||
27 | |||
28 | /* | ||
29 | * Maximum number of IOMMUs supported | ||
30 | */ | ||
31 | #define MAX_IOMMUS 32 | ||
32 | |||
33 | /* | ||
34 | * some size calculation constants | ||
35 | */ | ||
36 | #define DEV_TABLE_ENTRY_SIZE 32 | ||
37 | #define ALIAS_TABLE_ENTRY_SIZE 2 | ||
38 | #define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) | ||
39 | |||
40 | /* Length of the MMIO region for the AMD IOMMU */ | ||
41 | #define MMIO_REGION_LENGTH 0x4000 | ||
42 | |||
43 | /* Capability offsets used by the driver */ | ||
44 | #define MMIO_CAP_HDR_OFFSET 0x00 | ||
45 | #define MMIO_RANGE_OFFSET 0x0c | ||
46 | #define MMIO_MISC_OFFSET 0x10 | ||
47 | |||
48 | /* Masks, shifts and macros to parse the device range capability */ | ||
49 | #define MMIO_RANGE_LD_MASK 0xff000000 | ||
50 | #define MMIO_RANGE_FD_MASK 0x00ff0000 | ||
51 | #define MMIO_RANGE_BUS_MASK 0x0000ff00 | ||
52 | #define MMIO_RANGE_LD_SHIFT 24 | ||
53 | #define MMIO_RANGE_FD_SHIFT 16 | ||
54 | #define MMIO_RANGE_BUS_SHIFT 8 | ||
55 | #define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) | ||
56 | #define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) | ||
57 | #define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) | ||
58 | #define MMIO_MSI_NUM(x) ((x) & 0x1f) | ||
59 | |||
60 | /* Flag masks for the AMD IOMMU exclusion range */ | ||
61 | #define MMIO_EXCL_ENABLE_MASK 0x01ULL | ||
62 | #define MMIO_EXCL_ALLOW_MASK 0x02ULL | ||
63 | |||
64 | /* Used offsets into the MMIO space */ | ||
65 | #define MMIO_DEV_TABLE_OFFSET 0x0000 | ||
66 | #define MMIO_CMD_BUF_OFFSET 0x0008 | ||
67 | #define MMIO_EVT_BUF_OFFSET 0x0010 | ||
68 | #define MMIO_CONTROL_OFFSET 0x0018 | ||
69 | #define MMIO_EXCL_BASE_OFFSET 0x0020 | ||
70 | #define MMIO_EXCL_LIMIT_OFFSET 0x0028 | ||
71 | #define MMIO_EXT_FEATURES 0x0030 | ||
72 | #define MMIO_CMD_HEAD_OFFSET 0x2000 | ||
73 | #define MMIO_CMD_TAIL_OFFSET 0x2008 | ||
74 | #define MMIO_EVT_HEAD_OFFSET 0x2010 | ||
75 | #define MMIO_EVT_TAIL_OFFSET 0x2018 | ||
76 | #define MMIO_STATUS_OFFSET 0x2020 | ||
77 | |||
78 | |||
79 | /* Extended Feature Bits */ | ||
80 | #define FEATURE_PREFETCH (1ULL<<0) | ||
81 | #define FEATURE_PPR (1ULL<<1) | ||
82 | #define FEATURE_X2APIC (1ULL<<2) | ||
83 | #define FEATURE_NX (1ULL<<3) | ||
84 | #define FEATURE_GT (1ULL<<4) | ||
85 | #define FEATURE_IA (1ULL<<6) | ||
86 | #define FEATURE_GA (1ULL<<7) | ||
87 | #define FEATURE_HE (1ULL<<8) | ||
88 | #define FEATURE_PC (1ULL<<9) | ||
89 | |||
90 | /* MMIO status bits */ | ||
91 | #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 | ||
92 | |||
93 | /* event logging constants */ | ||
94 | #define EVENT_ENTRY_SIZE 0x10 | ||
95 | #define EVENT_TYPE_SHIFT 28 | ||
96 | #define EVENT_TYPE_MASK 0xf | ||
97 | #define EVENT_TYPE_ILL_DEV 0x1 | ||
98 | #define EVENT_TYPE_IO_FAULT 0x2 | ||
99 | #define EVENT_TYPE_DEV_TAB_ERR 0x3 | ||
100 | #define EVENT_TYPE_PAGE_TAB_ERR 0x4 | ||
101 | #define EVENT_TYPE_ILL_CMD 0x5 | ||
102 | #define EVENT_TYPE_CMD_HARD_ERR 0x6 | ||
103 | #define EVENT_TYPE_IOTLB_INV_TO 0x7 | ||
104 | #define EVENT_TYPE_INV_DEV_REQ 0x8 | ||
105 | #define EVENT_DEVID_MASK 0xffff | ||
106 | #define EVENT_DEVID_SHIFT 0 | ||
107 | #define EVENT_DOMID_MASK 0xffff | ||
108 | #define EVENT_DOMID_SHIFT 0 | ||
109 | #define EVENT_FLAGS_MASK 0xfff | ||
110 | #define EVENT_FLAGS_SHIFT 0x10 | ||
111 | |||
112 | /* feature control bits */ | ||
113 | #define CONTROL_IOMMU_EN 0x00ULL | ||
114 | #define CONTROL_HT_TUN_EN 0x01ULL | ||
115 | #define CONTROL_EVT_LOG_EN 0x02ULL | ||
116 | #define CONTROL_EVT_INT_EN 0x03ULL | ||
117 | #define CONTROL_COMWAIT_EN 0x04ULL | ||
118 | #define CONTROL_PASSPW_EN 0x08ULL | ||
119 | #define CONTROL_RESPASSPW_EN 0x09ULL | ||
120 | #define CONTROL_COHERENT_EN 0x0aULL | ||
121 | #define CONTROL_ISOC_EN 0x0bULL | ||
122 | #define CONTROL_CMDBUF_EN 0x0cULL | ||
123 | #define CONTROL_PPFLOG_EN 0x0dULL | ||
124 | #define CONTROL_PPFINT_EN 0x0eULL | ||
125 | |||
126 | /* command specific defines */ | ||
127 | #define CMD_COMPL_WAIT 0x01 | ||
128 | #define CMD_INV_DEV_ENTRY 0x02 | ||
129 | #define CMD_INV_IOMMU_PAGES 0x03 | ||
130 | #define CMD_INV_IOTLB_PAGES 0x04 | ||
131 | #define CMD_INV_ALL 0x08 | ||
132 | |||
133 | #define CMD_COMPL_WAIT_STORE_MASK 0x01 | ||
134 | #define CMD_COMPL_WAIT_INT_MASK 0x02 | ||
135 | #define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 | ||
136 | #define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 | ||
137 | |||
138 | #define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL | ||
139 | |||
140 | /* macros and definitions for device table entries */ | ||
141 | #define DEV_ENTRY_VALID 0x00 | ||
142 | #define DEV_ENTRY_TRANSLATION 0x01 | ||
143 | #define DEV_ENTRY_IR 0x3d | ||
144 | #define DEV_ENTRY_IW 0x3e | ||
145 | #define DEV_ENTRY_NO_PAGE_FAULT 0x62 | ||
146 | #define DEV_ENTRY_EX 0x67 | ||
147 | #define DEV_ENTRY_SYSMGT1 0x68 | ||
148 | #define DEV_ENTRY_SYSMGT2 0x69 | ||
149 | #define DEV_ENTRY_INIT_PASS 0xb8 | ||
150 | #define DEV_ENTRY_EINT_PASS 0xb9 | ||
151 | #define DEV_ENTRY_NMI_PASS 0xba | ||
152 | #define DEV_ENTRY_LINT0_PASS 0xbe | ||
153 | #define DEV_ENTRY_LINT1_PASS 0xbf | ||
154 | #define DEV_ENTRY_MODE_MASK 0x07 | ||
155 | #define DEV_ENTRY_MODE_SHIFT 0x09 | ||
156 | |||
157 | /* constants to configure the command buffer */ | ||
158 | #define CMD_BUFFER_SIZE 8192 | ||
159 | #define CMD_BUFFER_UNINITIALIZED 1 | ||
160 | #define CMD_BUFFER_ENTRIES 512 | ||
161 | #define MMIO_CMD_SIZE_SHIFT 56 | ||
162 | #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) | ||
163 | |||
164 | /* constants for event buffer handling */ | ||
165 | #define EVT_BUFFER_SIZE 8192 /* 512 entries */ | ||
166 | #define EVT_LEN_MASK (0x9ULL << 56) | ||
167 | |||
168 | #define PAGE_MODE_NONE 0x00 | ||
169 | #define PAGE_MODE_1_LEVEL 0x01 | ||
170 | #define PAGE_MODE_2_LEVEL 0x02 | ||
171 | #define PAGE_MODE_3_LEVEL 0x03 | ||
172 | #define PAGE_MODE_4_LEVEL 0x04 | ||
173 | #define PAGE_MODE_5_LEVEL 0x05 | ||
174 | #define PAGE_MODE_6_LEVEL 0x06 | ||
175 | |||
176 | #define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) | ||
177 | #define PM_LEVEL_SIZE(x) (((x) < 6) ? \ | ||
178 | ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ | ||
179 | (0xffffffffffffffffULL)) | ||
180 | #define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) | ||
181 | #define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) | ||
182 | #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ | ||
183 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) | ||
184 | #define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) | ||
185 | |||
186 | #define PM_MAP_4k 0 | ||
187 | #define PM_ADDR_MASK 0x000ffffffffff000ULL | ||
188 | #define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ | ||
189 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) | ||
190 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) | ||
191 | |||
192 | /* | ||
193 | * Returns the page table level to use for a given page size | ||
194 | * Pagesize is expected to be a power-of-two | ||
195 | */ | ||
196 | #define PAGE_SIZE_LEVEL(pagesize) \ | ||
197 | ((__ffs(pagesize) - 12) / 9) | ||
198 | /* | ||
199 | * Returns the number of ptes to use for a given page size | ||
200 | * Pagesize is expected to be a power-of-two | ||
201 | */ | ||
202 | #define PAGE_SIZE_PTE_COUNT(pagesize) \ | ||
203 | (1ULL << ((__ffs(pagesize) - 12) % 9)) | ||
204 | |||
205 | /* | ||
206 | * Aligns a given io-virtual address to a given page size | ||
207 | * Pagesize is expected to be a power-of-two | ||
208 | */ | ||
209 | #define PAGE_SIZE_ALIGN(address, pagesize) \ | ||
210 | ((address) & ~((pagesize) - 1)) | ||
211 | /* | ||
212 | * Creates an IOMMU PTE for an address an a given pagesize | ||
213 | * The PTE has no permission bits set | ||
214 | * Pagesize is expected to be a power-of-two larger than 4096 | ||
215 | */ | ||
216 | #define PAGE_SIZE_PTE(address, pagesize) \ | ||
217 | (((address) | ((pagesize) - 1)) & \ | ||
218 | (~(pagesize >> 1)) & PM_ADDR_MASK) | ||
219 | |||
220 | /* | ||
221 | * Takes a PTE value with mode=0x07 and returns the page size it maps | ||
222 | */ | ||
223 | #define PTE_PAGE_SIZE(pte) \ | ||
224 | (1ULL << (1 + ffz(((pte) | 0xfffULL)))) | ||
225 | |||
226 | #define IOMMU_PTE_P (1ULL << 0) | ||
227 | #define IOMMU_PTE_TV (1ULL << 1) | ||
228 | #define IOMMU_PTE_U (1ULL << 59) | ||
229 | #define IOMMU_PTE_FC (1ULL << 60) | ||
230 | #define IOMMU_PTE_IR (1ULL << 61) | ||
231 | #define IOMMU_PTE_IW (1ULL << 62) | ||
232 | |||
233 | #define DTE_FLAG_IOTLB 0x01 | ||
234 | |||
235 | #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) | ||
236 | #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) | ||
237 | #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) | ||
238 | #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) | ||
239 | |||
240 | #define IOMMU_PROT_MASK 0x03 | ||
241 | #define IOMMU_PROT_IR 0x01 | ||
242 | #define IOMMU_PROT_IW 0x02 | ||
243 | |||
244 | /* IOMMU capabilities */ | ||
245 | #define IOMMU_CAP_IOTLB 24 | ||
246 | #define IOMMU_CAP_NPCACHE 26 | ||
247 | #define IOMMU_CAP_EFR 27 | ||
248 | |||
249 | #define MAX_DOMAIN_ID 65536 | ||
250 | |||
251 | /* FIXME: move this macro to <linux/pci.h> */ | ||
252 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | ||
253 | |||
254 | /* Protection domain flags */ | ||
255 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ | ||
256 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops | ||
257 | domain for an IOMMU */ | ||
258 | #define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page | ||
259 | translation */ | ||
260 | |||
261 | extern bool amd_iommu_dump; | ||
262 | #define DUMP_printk(format, arg...) \ | ||
263 | do { \ | ||
264 | if (amd_iommu_dump) \ | ||
265 | printk(KERN_INFO "AMD-Vi: " format, ## arg); \ | ||
266 | } while(0); | ||
267 | |||
268 | /* global flag if IOMMUs cache non-present entries */ | ||
269 | extern bool amd_iommu_np_cache; | ||
270 | /* Only true if all IOMMUs support device IOTLBs */ | ||
271 | extern bool amd_iommu_iotlb_sup; | ||
272 | |||
273 | /* | ||
274 | * Make iterating over all IOMMUs easier | ||
275 | */ | ||
276 | #define for_each_iommu(iommu) \ | ||
277 | list_for_each_entry((iommu), &amd_iommu_list, list) | ||
278 | #define for_each_iommu_safe(iommu, next) \ | ||
279 | list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) | ||
280 | |||
281 | #define APERTURE_RANGE_SHIFT 27 /* 128 MB */ | ||
282 | #define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) | ||
283 | #define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) | ||
284 | #define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ | ||
285 | #define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) | ||
286 | #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) | ||
287 | |||
288 | /* | ||
289 | * This structure contains generic data for IOMMU protection domains | ||
290 | * independent of their use. | ||
291 | */ | ||
292 | struct protection_domain { | ||
293 | struct list_head list; /* for list of all protection domains */ | ||
294 | struct list_head dev_list; /* List of all devices in this domain */ | ||
295 | spinlock_t lock; /* mostly used to lock the page table*/ | ||
296 | struct mutex api_lock; /* protect page tables in the iommu-api path */ | ||
297 | u16 id; /* the domain id written to the device table */ | ||
298 | int mode; /* paging mode (0-6 levels) */ | ||
299 | u64 *pt_root; /* page table root pointer */ | ||
300 | unsigned long flags; /* flags to find out type of domain */ | ||
301 | bool updated; /* complete domain flush required */ | ||
302 | unsigned dev_cnt; /* devices assigned to this domain */ | ||
303 | unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ | ||
304 | void *priv; /* private data */ | ||
305 | |||
306 | }; | ||
307 | |||
308 | /* | ||
309 | * This struct contains device specific data for the IOMMU | ||
310 | */ | ||
311 | struct iommu_dev_data { | ||
312 | struct list_head list; /* For domain->dev_list */ | ||
313 | struct device *dev; /* Device this data belong to */ | ||
314 | struct device *alias; /* The Alias Device */ | ||
315 | struct protection_domain *domain; /* Domain the device is bound to */ | ||
316 | atomic_t bind; /* Domain attach reverent count */ | ||
317 | }; | ||
318 | |||
319 | /* | ||
320 | * For dynamic growth the aperture size is split into ranges of 128MB of | ||
321 | * DMA address space each. This struct represents one such range. | ||
322 | */ | ||
323 | struct aperture_range { | ||
324 | |||
325 | /* address allocation bitmap */ | ||
326 | unsigned long *bitmap; | ||
327 | |||
328 | /* | ||
329 | * Array of PTE pages for the aperture. In this array we save all the | ||
330 | * leaf pages of the domain page table used for the aperture. This way | ||
331 | * we don't need to walk the page table to find a specific PTE. We can | ||
332 | * just calculate its address in constant time. | ||
333 | */ | ||
334 | u64 *pte_pages[64]; | ||
335 | |||
336 | unsigned long offset; | ||
337 | }; | ||
338 | |||
339 | /* | ||
340 | * Data container for a dma_ops specific protection domain | ||
341 | */ | ||
342 | struct dma_ops_domain { | ||
343 | struct list_head list; | ||
344 | |||
345 | /* generic protection domain information */ | ||
346 | struct protection_domain domain; | ||
347 | |||
348 | /* size of the aperture for the mappings */ | ||
349 | unsigned long aperture_size; | ||
350 | |||
351 | /* address we start to search for free addresses */ | ||
352 | unsigned long next_address; | ||
353 | |||
354 | /* address space relevant data */ | ||
355 | struct aperture_range *aperture[APERTURE_MAX_RANGES]; | ||
356 | |||
357 | /* This will be set to true when TLB needs to be flushed */ | ||
358 | bool need_flush; | ||
359 | |||
360 | /* | ||
361 | * if this is a preallocated domain, keep the device for which it was | ||
362 | * preallocated in this variable | ||
363 | */ | ||
364 | u16 target_dev; | ||
365 | }; | ||
366 | |||
367 | /* | ||
368 | * Structure where we save information about one hardware AMD IOMMU in the | ||
369 | * system. | ||
370 | */ | ||
371 | struct amd_iommu { | ||
372 | struct list_head list; | ||
373 | |||
374 | /* Index within the IOMMU array */ | ||
375 | int index; | ||
376 | |||
377 | /* locks the accesses to the hardware */ | ||
378 | spinlock_t lock; | ||
379 | |||
380 | /* Pointer to PCI device of this IOMMU */ | ||
381 | struct pci_dev *dev; | ||
382 | |||
383 | /* physical address of MMIO space */ | ||
384 | u64 mmio_phys; | ||
385 | /* virtual address of MMIO space */ | ||
386 | u8 *mmio_base; | ||
387 | |||
388 | /* capabilities of that IOMMU read from ACPI */ | ||
389 | u32 cap; | ||
390 | |||
391 | /* flags read from acpi table */ | ||
392 | u8 acpi_flags; | ||
393 | |||
394 | /* Extended features */ | ||
395 | u64 features; | ||
396 | |||
397 | /* | ||
398 | * Capability pointer. There could be more than one IOMMU per PCI | ||
399 | * device function if there are more than one AMD IOMMU capability | ||
400 | * pointers. | ||
401 | */ | ||
402 | u16 cap_ptr; | ||
403 | |||
404 | /* pci domain of this IOMMU */ | ||
405 | u16 pci_seg; | ||
406 | |||
407 | /* first device this IOMMU handles. read from PCI */ | ||
408 | u16 first_device; | ||
409 | /* last device this IOMMU handles. read from PCI */ | ||
410 | u16 last_device; | ||
411 | |||
412 | /* start of exclusion range of that IOMMU */ | ||
413 | u64 exclusion_start; | ||
414 | /* length of exclusion range of that IOMMU */ | ||
415 | u64 exclusion_length; | ||
416 | |||
417 | /* command buffer virtual address */ | ||
418 | u8 *cmd_buf; | ||
419 | /* size of command buffer */ | ||
420 | u32 cmd_buf_size; | ||
421 | |||
422 | /* size of event buffer */ | ||
423 | u32 evt_buf_size; | ||
424 | /* event buffer virtual address */ | ||
425 | u8 *evt_buf; | ||
426 | /* MSI number for event interrupt */ | ||
427 | u16 evt_msi_num; | ||
428 | |||
429 | /* true if interrupts for this IOMMU are already enabled */ | ||
430 | bool int_enabled; | ||
431 | |||
432 | /* if one, we need to send a completion wait command */ | ||
433 | bool need_sync; | ||
434 | |||
435 | /* default dma_ops domain for that IOMMU */ | ||
436 | struct dma_ops_domain *default_dom; | ||
437 | |||
438 | /* | ||
439 | * We can't rely on the BIOS to restore all values on reinit, so we | ||
440 | * need to stash them | ||
441 | */ | ||
442 | |||
443 | /* The iommu BAR */ | ||
444 | u32 stored_addr_lo; | ||
445 | u32 stored_addr_hi; | ||
446 | |||
447 | /* | ||
448 | * Each iommu has 6 l1s, each of which is documented as having 0x12 | ||
449 | * registers | ||
450 | */ | ||
451 | u32 stored_l1[6][0x12]; | ||
452 | |||
453 | /* The l2 indirect registers */ | ||
454 | u32 stored_l2[0x83]; | ||
455 | }; | ||
456 | |||
457 | /* | ||
458 | * List with all IOMMUs in the system. This list is not locked because it is | ||
459 | * only written and read at driver initialization or suspend time | ||
460 | */ | ||
461 | extern struct list_head amd_iommu_list; | ||
462 | |||
463 | /* | ||
464 | * Array with pointers to each IOMMU struct | ||
465 | * The indices are referenced in the protection domains | ||
466 | */ | ||
467 | extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; | ||
468 | |||
469 | /* Number of IOMMUs present in the system */ | ||
470 | extern int amd_iommus_present; | ||
471 | |||
472 | /* | ||
473 | * Declarations for the global list of all protection domains | ||
474 | */ | ||
475 | extern spinlock_t amd_iommu_pd_lock; | ||
476 | extern struct list_head amd_iommu_pd_list; | ||
477 | |||
478 | /* | ||
479 | * Structure defining one entry in the device table | ||
480 | */ | ||
481 | struct dev_table_entry { | ||
482 | u32 data[8]; | ||
483 | }; | ||
484 | |||
485 | /* | ||
486 | * One entry for unity mappings parsed out of the ACPI table. | ||
487 | */ | ||
488 | struct unity_map_entry { | ||
489 | struct list_head list; | ||
490 | |||
491 | /* starting device id this entry is used for (including) */ | ||
492 | u16 devid_start; | ||
493 | /* end device id this entry is used for (including) */ | ||
494 | u16 devid_end; | ||
495 | |||
496 | /* start address to unity map (including) */ | ||
497 | u64 address_start; | ||
498 | /* end address to unity map (including) */ | ||
499 | u64 address_end; | ||
500 | |||
501 | /* required protection */ | ||
502 | int prot; | ||
503 | }; | ||
504 | |||
505 | /* | ||
506 | * List of all unity mappings. It is not locked because as runtime it is only | ||
507 | * read. It is created at ACPI table parsing time. | ||
508 | */ | ||
509 | extern struct list_head amd_iommu_unity_map; | ||
510 | |||
511 | /* | ||
512 | * Data structures for device handling | ||
513 | */ | ||
514 | |||
515 | /* | ||
516 | * Device table used by hardware. Read and write accesses by software are | ||
517 | * locked with the amd_iommu_pd_table lock. | ||
518 | */ | ||
519 | extern struct dev_table_entry *amd_iommu_dev_table; | ||
520 | |||
521 | /* | ||
522 | * Alias table to find requestor ids to device ids. Not locked because only | ||
523 | * read on runtime. | ||
524 | */ | ||
525 | extern u16 *amd_iommu_alias_table; | ||
526 | |||
527 | /* | ||
528 | * Reverse lookup table to find the IOMMU which translates a specific device. | ||
529 | */ | ||
530 | extern struct amd_iommu **amd_iommu_rlookup_table; | ||
531 | |||
532 | /* size of the dma_ops aperture as power of 2 */ | ||
533 | extern unsigned amd_iommu_aperture_order; | ||
534 | |||
535 | /* largest PCI device id we expect translation requests for */ | ||
536 | extern u16 amd_iommu_last_bdf; | ||
537 | |||
538 | /* allocation bitmap for domain ids */ | ||
539 | extern unsigned long *amd_iommu_pd_alloc_bitmap; | ||
540 | |||
541 | /* | ||
542 | * If true, the addresses will be flushed on unmap time, not when | ||
543 | * they are reused | ||
544 | */ | ||
545 | extern bool amd_iommu_unmap_flush; | ||
546 | |||
547 | /* takes bus and device/function and returns the device id | ||
548 | * FIXME: should that be in generic PCI code? */ | ||
549 | static inline u16 calc_devid(u8 bus, u8 devfn) | ||
550 | { | ||
551 | return (((u16)bus) << 8) | devfn; | ||
552 | } | ||
553 | |||
554 | #ifdef CONFIG_AMD_IOMMU_STATS | ||
555 | |||
556 | struct __iommu_counter { | ||
557 | char *name; | ||
558 | struct dentry *dent; | ||
559 | u64 value; | ||
560 | }; | ||
561 | |||
562 | #define DECLARE_STATS_COUNTER(nm) \ | ||
563 | static struct __iommu_counter nm = { \ | ||
564 | .name = #nm, \ | ||
565 | } | ||
566 | |||
567 | #define INC_STATS_COUNTER(name) name.value += 1 | ||
568 | #define ADD_STATS_COUNTER(name, x) name.value += (x) | ||
569 | #define SUB_STATS_COUNTER(name, x) name.value -= (x) | ||
570 | |||
571 | #else /* CONFIG_AMD_IOMMU_STATS */ | ||
572 | |||
573 | #define DECLARE_STATS_COUNTER(name) | ||
574 | #define INC_STATS_COUNTER(name) | ||
575 | #define ADD_STATS_COUNTER(name, x) | ||
576 | #define SUB_STATS_COUNTER(name, x) | ||
577 | |||
578 | #endif /* CONFIG_AMD_IOMMU_STATS */ | ||
579 | |||
580 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ | ||
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index af60d8a2e288..0acbac299e49 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h | |||
@@ -18,24 +18,6 @@ | |||
18 | 18 | ||
19 | #ifdef CONFIG_APB_TIMER | 19 | #ifdef CONFIG_APB_TIMER |
20 | 20 | ||
21 | /* Langwell DW APB timer registers */ | ||
22 | #define APBTMR_N_LOAD_COUNT 0x00 | ||
23 | #define APBTMR_N_CURRENT_VALUE 0x04 | ||
24 | #define APBTMR_N_CONTROL 0x08 | ||
25 | #define APBTMR_N_EOI 0x0c | ||
26 | #define APBTMR_N_INT_STATUS 0x10 | ||
27 | |||
28 | #define APBTMRS_INT_STATUS 0xa0 | ||
29 | #define APBTMRS_EOI 0xa4 | ||
30 | #define APBTMRS_RAW_INT_STATUS 0xa8 | ||
31 | #define APBTMRS_COMP_VERSION 0xac | ||
32 | #define APBTMRS_REG_SIZE 0x14 | ||
33 | |||
34 | /* register bits */ | ||
35 | #define APBTMR_CONTROL_ENABLE (1<<0) | ||
36 | #define APBTMR_CONTROL_MODE_PERIODIC (1<<1) /*1: periodic 0:free running */ | ||
37 | #define APBTMR_CONTROL_INT (1<<2) | ||
38 | |||
39 | /* default memory mapped register base */ | 21 | /* default memory mapped register base */ |
40 | #define LNW_SCU_ADDR 0xFF100000 | 22 | #define LNW_SCU_ADDR 0xFF100000 |
41 | #define LNW_EXT_TIMER_OFFSET 0x1B800 | 23 | #define LNW_EXT_TIMER_OFFSET 0x1B800 |
@@ -43,14 +25,13 @@ | |||
43 | #define LNW_EXT_TIMER_PGOFFSET 0x800 | 25 | #define LNW_EXT_TIMER_PGOFFSET 0x800 |
44 | 26 | ||
45 | /* APBT clock speed range from PCLK to fabric base, 25-100MHz */ | 27 | /* APBT clock speed range from PCLK to fabric base, 25-100MHz */ |
46 | #define APBT_MAX_FREQ 50 | 28 | #define APBT_MAX_FREQ 50000000 |
47 | #define APBT_MIN_FREQ 1 | 29 | #define APBT_MIN_FREQ 1000000 |
48 | #define APBT_MMAP_SIZE 1024 | 30 | #define APBT_MMAP_SIZE 1024 |
49 | 31 | ||
50 | #define APBT_DEV_USED 1 | 32 | #define APBT_DEV_USED 1 |
51 | 33 | ||
52 | extern void apbt_time_init(void); | 34 | extern void apbt_time_init(void); |
53 | extern struct clock_event_device *global_clock_event; | ||
54 | extern unsigned long apbt_quick_calibrate(void); | 35 | extern unsigned long apbt_quick_calibrate(void); |
55 | extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); | 36 | extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); |
56 | extern void apbt_setup_secondary_clock(void); | 37 | extern void apbt_setup_secondary_clock(void); |
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index b3ed1e1460ff..9412d6558c88 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
@@ -3,9 +3,11 @@ | |||
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | 4 | #ifdef __ASSEMBLY__ |
5 | # define __ASM_FORM(x) x | 5 | # define __ASM_FORM(x) x |
6 | # define __ASM_FORM_COMMA(x) x, | ||
6 | # define __ASM_EX_SEC .section __ex_table, "a" | 7 | # define __ASM_EX_SEC .section __ex_table, "a" |
7 | #else | 8 | #else |
8 | # define __ASM_FORM(x) " " #x " " | 9 | # define __ASM_FORM(x) " " #x " " |
10 | # define __ASM_FORM_COMMA(x) " " #x "," | ||
9 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" | 11 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" |
10 | #endif | 12 | #endif |
11 | 13 | ||
@@ -15,7 +17,8 @@ | |||
15 | # define __ASM_SEL(a,b) __ASM_FORM(b) | 17 | # define __ASM_SEL(a,b) __ASM_FORM(b) |
16 | #endif | 18 | #endif |
17 | 19 | ||
18 | #define __ASM_SIZE(inst) __ASM_SEL(inst##l, inst##q) | 20 | #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ |
21 | inst##q##__VA_ARGS__) | ||
19 | #define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) | 22 | #define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) |
20 | 23 | ||
21 | #define _ASM_PTR __ASM_SEL(.long, .quad) | 24 | #define _ASM_PTR __ASM_SEL(.long, .quad) |
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 30af5a832163..a9e3a740f697 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h | |||
@@ -46,6 +46,7 @@ For 32-bit we have the following conventions - kernel is built with | |||
46 | 46 | ||
47 | */ | 47 | */ |
48 | 48 | ||
49 | #include "dwarf2.h" | ||
49 | 50 | ||
50 | /* | 51 | /* |
51 | * 64-bit system call stack frame layout defines and helpers, for | 52 | * 64-bit system call stack frame layout defines and helpers, for |
@@ -84,72 +85,57 @@ For 32-bit we have the following conventions - kernel is built with | |||
84 | #define ARGOFFSET R11 | 85 | #define ARGOFFSET R11 |
85 | #define SWFRAME ORIG_RAX | 86 | #define SWFRAME ORIG_RAX |
86 | 87 | ||
87 | .macro SAVE_ARGS addskip=0, norcx=0, nor891011=0 | 88 | .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1 |
88 | subq $9*8+\addskip, %rsp | 89 | subq $9*8+\addskip, %rsp |
89 | CFI_ADJUST_CFA_OFFSET 9*8+\addskip | 90 | CFI_ADJUST_CFA_OFFSET 9*8+\addskip |
90 | movq %rdi, 8*8(%rsp) | 91 | movq_cfi rdi, 8*8 |
91 | CFI_REL_OFFSET rdi, 8*8 | 92 | movq_cfi rsi, 7*8 |
92 | movq %rsi, 7*8(%rsp) | 93 | movq_cfi rdx, 6*8 |
93 | CFI_REL_OFFSET rsi, 7*8 | 94 | |
94 | movq %rdx, 6*8(%rsp) | 95 | .if \save_rcx |
95 | CFI_REL_OFFSET rdx, 6*8 | 96 | movq_cfi rcx, 5*8 |
96 | .if \norcx | ||
97 | .else | ||
98 | movq %rcx, 5*8(%rsp) | ||
99 | CFI_REL_OFFSET rcx, 5*8 | ||
100 | .endif | 97 | .endif |
101 | movq %rax, 4*8(%rsp) | 98 | |
102 | CFI_REL_OFFSET rax, 4*8 | 99 | movq_cfi rax, 4*8 |
103 | .if \nor891011 | 100 | |
104 | .else | 101 | .if \save_r891011 |
105 | movq %r8, 3*8(%rsp) | 102 | movq_cfi r8, 3*8 |
106 | CFI_REL_OFFSET r8, 3*8 | 103 | movq_cfi r9, 2*8 |
107 | movq %r9, 2*8(%rsp) | 104 | movq_cfi r10, 1*8 |
108 | CFI_REL_OFFSET r9, 2*8 | 105 | movq_cfi r11, 0*8 |
109 | movq %r10, 1*8(%rsp) | ||
110 | CFI_REL_OFFSET r10, 1*8 | ||
111 | movq %r11, (%rsp) | ||
112 | CFI_REL_OFFSET r11, 0*8 | ||
113 | .endif | 106 | .endif |
107 | |||
114 | .endm | 108 | .endm |
115 | 109 | ||
116 | #define ARG_SKIP (9*8) | 110 | #define ARG_SKIP (9*8) |
117 | 111 | ||
118 | .macro RESTORE_ARGS skiprax=0, addskip=0, skiprcx=0, skipr11=0, \ | 112 | .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ |
119 | skipr8910=0, skiprdx=0 | 113 | rstor_r8910=1, rstor_rdx=1 |
120 | .if \skipr11 | 114 | .if \rstor_r11 |
121 | .else | 115 | movq_cfi_restore 0*8, r11 |
122 | movq (%rsp), %r11 | ||
123 | CFI_RESTORE r11 | ||
124 | .endif | 116 | .endif |
125 | .if \skipr8910 | 117 | |
126 | .else | 118 | .if \rstor_r8910 |
127 | movq 1*8(%rsp), %r10 | 119 | movq_cfi_restore 1*8, r10 |
128 | CFI_RESTORE r10 | 120 | movq_cfi_restore 2*8, r9 |
129 | movq 2*8(%rsp), %r9 | 121 | movq_cfi_restore 3*8, r8 |
130 | CFI_RESTORE r9 | ||
131 | movq 3*8(%rsp), %r8 | ||
132 | CFI_RESTORE r8 | ||
133 | .endif | 122 | .endif |
134 | .if \skiprax | 123 | |
135 | .else | 124 | .if \rstor_rax |
136 | movq 4*8(%rsp), %rax | 125 | movq_cfi_restore 4*8, rax |
137 | CFI_RESTORE rax | ||
138 | .endif | 126 | .endif |
139 | .if \skiprcx | 127 | |
140 | .else | 128 | .if \rstor_rcx |
141 | movq 5*8(%rsp), %rcx | 129 | movq_cfi_restore 5*8, rcx |
142 | CFI_RESTORE rcx | ||
143 | .endif | 130 | .endif |
144 | .if \skiprdx | 131 | |
145 | .else | 132 | .if \rstor_rdx |
146 | movq 6*8(%rsp), %rdx | 133 | movq_cfi_restore 6*8, rdx |
147 | CFI_RESTORE rdx | ||
148 | .endif | 134 | .endif |
149 | movq 7*8(%rsp), %rsi | 135 | |
150 | CFI_RESTORE rsi | 136 | movq_cfi_restore 7*8, rsi |
151 | movq 8*8(%rsp), %rdi | 137 | movq_cfi_restore 8*8, rdi |
152 | CFI_RESTORE rdi | 138 | |
153 | .if ARG_SKIP+\addskip > 0 | 139 | .if ARG_SKIP+\addskip > 0 |
154 | addq $ARG_SKIP+\addskip, %rsp | 140 | addq $ARG_SKIP+\addskip, %rsp |
155 | CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) | 141 | CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) |
@@ -176,33 +162,21 @@ For 32-bit we have the following conventions - kernel is built with | |||
176 | .macro SAVE_REST | 162 | .macro SAVE_REST |
177 | subq $REST_SKIP, %rsp | 163 | subq $REST_SKIP, %rsp |
178 | CFI_ADJUST_CFA_OFFSET REST_SKIP | 164 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
179 | movq %rbx, 5*8(%rsp) | 165 | movq_cfi rbx, 5*8 |
180 | CFI_REL_OFFSET rbx, 5*8 | 166 | movq_cfi rbp, 4*8 |
181 | movq %rbp, 4*8(%rsp) | 167 | movq_cfi r12, 3*8 |
182 | CFI_REL_OFFSET rbp, 4*8 | 168 | movq_cfi r13, 2*8 |
183 | movq %r12, 3*8(%rsp) | 169 | movq_cfi r14, 1*8 |
184 | CFI_REL_OFFSET r12, 3*8 | 170 | movq_cfi r15, 0*8 |
185 | movq %r13, 2*8(%rsp) | ||
186 | CFI_REL_OFFSET r13, 2*8 | ||
187 | movq %r14, 1*8(%rsp) | ||
188 | CFI_REL_OFFSET r14, 1*8 | ||
189 | movq %r15, (%rsp) | ||
190 | CFI_REL_OFFSET r15, 0*8 | ||
191 | .endm | 171 | .endm |
192 | 172 | ||
193 | .macro RESTORE_REST | 173 | .macro RESTORE_REST |
194 | movq (%rsp), %r15 | 174 | movq_cfi_restore 0*8, r15 |
195 | CFI_RESTORE r15 | 175 | movq_cfi_restore 1*8, r14 |
196 | movq 1*8(%rsp), %r14 | 176 | movq_cfi_restore 2*8, r13 |
197 | CFI_RESTORE r14 | 177 | movq_cfi_restore 3*8, r12 |
198 | movq 2*8(%rsp), %r13 | 178 | movq_cfi_restore 4*8, rbp |
199 | CFI_RESTORE r13 | 179 | movq_cfi_restore 5*8, rbx |
200 | movq 3*8(%rsp), %r12 | ||
201 | CFI_RESTORE r12 | ||
202 | movq 4*8(%rsp), %rbp | ||
203 | CFI_RESTORE rbp | ||
204 | movq 5*8(%rsp), %rbx | ||
205 | CFI_RESTORE rbx | ||
206 | addq $REST_SKIP, %rsp | 180 | addq $REST_SKIP, %rsp |
207 | CFI_ADJUST_CFA_OFFSET -(REST_SKIP) | 181 | CFI_ADJUST_CFA_OFFSET -(REST_SKIP) |
208 | .endm | 182 | .endm |
@@ -214,7 +188,7 @@ For 32-bit we have the following conventions - kernel is built with | |||
214 | 188 | ||
215 | .macro RESTORE_ALL addskip=0 | 189 | .macro RESTORE_ALL addskip=0 |
216 | RESTORE_REST | 190 | RESTORE_REST |
217 | RESTORE_ARGS 0, \addskip | 191 | RESTORE_ARGS 1, \addskip |
218 | .endm | 192 | .endm |
219 | 193 | ||
220 | .macro icebp | 194 | .macro icebp |
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h new file mode 100644 index 000000000000..0bdbbb3b9ce7 --- /dev/null +++ b/arch/x86/include/asm/clocksource.h | |||
@@ -0,0 +1,18 @@ | |||
1 | /* x86-specific clocksource additions */ | ||
2 | |||
3 | #ifndef _ASM_X86_CLOCKSOURCE_H | ||
4 | #define _ASM_X86_CLOCKSOURCE_H | ||
5 | |||
6 | #ifdef CONFIG_X86_64 | ||
7 | |||
8 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ | ||
9 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ | ||
10 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ | ||
11 | |||
12 | struct arch_clocksource_data { | ||
13 | int vclock_mode; | ||
14 | }; | ||
15 | |||
16 | #endif /* CONFIG_X86_64 */ | ||
17 | |||
18 | #endif /* _ASM_X86_CLOCKSOURCE_H */ | ||
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 284a6e8f7ce1..3deb7250624c 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -280,4 +280,52 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | |||
280 | 280 | ||
281 | #endif | 281 | #endif |
282 | 282 | ||
283 | #define cmpxchg8b(ptr, o1, o2, n1, n2) \ | ||
284 | ({ \ | ||
285 | char __ret; \ | ||
286 | __typeof__(o2) __dummy; \ | ||
287 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
288 | __typeof__(o2) __old2 = (o2); \ | ||
289 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
290 | __typeof__(o2) __new2 = (n2); \ | ||
291 | asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \ | ||
292 | : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\ | ||
293 | : "a" (__old1), "d"(__old2), \ | ||
294 | "b" (__new1), "c" (__new2) \ | ||
295 | : "memory"); \ | ||
296 | __ret; }) | ||
297 | |||
298 | |||
299 | #define cmpxchg8b_local(ptr, o1, o2, n1, n2) \ | ||
300 | ({ \ | ||
301 | char __ret; \ | ||
302 | __typeof__(o2) __dummy; \ | ||
303 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
304 | __typeof__(o2) __old2 = (o2); \ | ||
305 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
306 | __typeof__(o2) __new2 = (n2); \ | ||
307 | asm volatile("cmpxchg8b %2; setz %1" \ | ||
308 | : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\ | ||
309 | : "a" (__old), "d"(__old2), \ | ||
310 | "b" (__new1), "c" (__new2), \ | ||
311 | : "memory"); \ | ||
312 | __ret; }) | ||
313 | |||
314 | |||
315 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
316 | ({ \ | ||
317 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
318 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
319 | cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \ | ||
320 | }) | ||
321 | |||
322 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
323 | ({ \ | ||
324 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
325 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
326 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
327 | }) | ||
328 | |||
329 | #define system_has_cmpxchg_double() cpu_has_cx8 | ||
330 | |||
283 | #endif /* _ASM_X86_CMPXCHG_32_H */ | 331 | #endif /* _ASM_X86_CMPXCHG_32_H */ |
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 423ae58aa020..7cf5c0a24434 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h | |||
@@ -151,4 +151,49 @@ extern void __cmpxchg_wrong_size(void); | |||
151 | cmpxchg_local((ptr), (o), (n)); \ | 151 | cmpxchg_local((ptr), (o), (n)); \ |
152 | }) | 152 | }) |
153 | 153 | ||
154 | #define cmpxchg16b(ptr, o1, o2, n1, n2) \ | ||
155 | ({ \ | ||
156 | char __ret; \ | ||
157 | __typeof__(o2) __junk; \ | ||
158 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
159 | __typeof__(o2) __old2 = (o2); \ | ||
160 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
161 | __typeof__(o2) __new2 = (n2); \ | ||
162 | asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \ | ||
163 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
164 | : "b"(__new1), "c"(__new2), \ | ||
165 | "a"(__old1), "d"(__old2)); \ | ||
166 | __ret; }) | ||
167 | |||
168 | |||
169 | #define cmpxchg16b_local(ptr, o1, o2, n1, n2) \ | ||
170 | ({ \ | ||
171 | char __ret; \ | ||
172 | __typeof__(o2) __junk; \ | ||
173 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
174 | __typeof__(o2) __old2 = (o2); \ | ||
175 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
176 | __typeof__(o2) __new2 = (n2); \ | ||
177 | asm volatile("cmpxchg16b %2;setz %1" \ | ||
178 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
179 | : "b"(__new1), "c"(__new2), \ | ||
180 | "a"(__old1), "d"(__old2)); \ | ||
181 | __ret; }) | ||
182 | |||
183 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
184 | ({ \ | ||
185 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
186 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
187 | cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \ | ||
188 | }) | ||
189 | |||
190 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
191 | ({ \ | ||
192 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
193 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
194 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
195 | }) | ||
196 | |||
197 | #define system_has_cmpxchg_double() cpu_has_cx16 | ||
198 | |||
154 | #endif /* _ASM_X86_CMPXCHG_64_H */ | 199 | #endif /* _ASM_X86_CMPXCHG_64_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 71cc3800712c..4258aac99a6e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -288,6 +288,8 @@ extern const char * const x86_power_flags[32]; | |||
288 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | 288 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) |
289 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) | 289 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) |
290 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) | 290 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) |
291 | #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) | ||
292 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) | ||
291 | 293 | ||
292 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 294 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
293 | # define cpu_has_invlpg 1 | 295 | # define cpu_has_invlpg 1 |
@@ -331,8 +333,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
331 | "2:\n" | 333 | "2:\n" |
332 | ".section .altinstructions,\"a\"\n" | 334 | ".section .altinstructions,\"a\"\n" |
333 | _ASM_ALIGN "\n" | 335 | _ASM_ALIGN "\n" |
334 | _ASM_PTR "1b\n" | 336 | " .long 1b - .\n" |
335 | _ASM_PTR "0\n" /* no replacement */ | 337 | " .long 0\n" /* no replacement */ |
336 | " .word %P0\n" /* feature bit */ | 338 | " .word %P0\n" /* feature bit */ |
337 | " .byte 2b - 1b\n" /* source len */ | 339 | " .byte 2b - 1b\n" /* source len */ |
338 | " .byte 0\n" /* replacement len */ | 340 | " .byte 0\n" /* replacement len */ |
@@ -349,8 +351,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) | |||
349 | "2:\n" | 351 | "2:\n" |
350 | ".section .altinstructions,\"a\"\n" | 352 | ".section .altinstructions,\"a\"\n" |
351 | _ASM_ALIGN "\n" | 353 | _ASM_ALIGN "\n" |
352 | _ASM_PTR "1b\n" | 354 | " .long 1b - .\n" |
353 | _ASM_PTR "3f\n" | 355 | " .long 3f - .\n" |
354 | " .word %P1\n" /* feature bit */ | 356 | " .word %P1\n" /* feature bit */ |
355 | " .byte 2b - 1b\n" /* source len */ | 357 | " .byte 2b - 1b\n" /* source len */ |
356 | " .byte 4f - 3f\n" /* replacement len */ | 358 | " .byte 4f - 3f\n" /* replacement len */ |
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 409a649204aa..9b3b4f2754c7 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h | |||
@@ -1,30 +1,7 @@ | |||
1 | #ifndef _ASM_X86_DELAY_H | 1 | #ifndef _ASM_X86_DELAY_H |
2 | #define _ASM_X86_DELAY_H | 2 | #define _ASM_X86_DELAY_H |
3 | 3 | ||
4 | /* | 4 | #include <asm-generic/delay.h> |
5 | * Copyright (C) 1993 Linus Torvalds | ||
6 | * | ||
7 | * Delay routines calling functions in arch/x86/lib/delay.c | ||
8 | */ | ||
9 | |||
10 | /* Undefined functions to get compile-time errors */ | ||
11 | extern void __bad_udelay(void); | ||
12 | extern void __bad_ndelay(void); | ||
13 | |||
14 | extern void __udelay(unsigned long usecs); | ||
15 | extern void __ndelay(unsigned long nsecs); | ||
16 | extern void __const_udelay(unsigned long xloops); | ||
17 | extern void __delay(unsigned long loops); | ||
18 | |||
19 | /* 0x10c7 is 2**32 / 1000000 (rounded up) */ | ||
20 | #define udelay(n) (__builtin_constant_p(n) ? \ | ||
21 | ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ | ||
22 | __udelay(n)) | ||
23 | |||
24 | /* 0x5 is 2**32 / 1000000000 (rounded up) */ | ||
25 | #define ndelay(n) (__builtin_constant_p(n) ? \ | ||
26 | ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ | ||
27 | __ndelay(n)) | ||
28 | 5 | ||
29 | void use_tsc_delay(void); | 6 | void use_tsc_delay(void); |
30 | 7 | ||
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 1cd6d26a0a8d..0baa628e330c 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -53,8 +53,4 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) | |||
53 | BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) | 53 | BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | #ifdef CONFIG_X86_MCE | ||
57 | BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) | ||
58 | #endif | ||
59 | |||
60 | #endif | 56 | #endif |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 4729b2b63117..460c74e4852c 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -78,6 +78,7 @@ enum fixed_addresses { | |||
78 | VSYSCALL_LAST_PAGE, | 78 | VSYSCALL_LAST_PAGE, |
79 | VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE | 79 | VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE |
80 | + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, | 80 | + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, |
81 | VVAR_PAGE, | ||
81 | VSYSCALL_HPET, | 82 | VSYSCALL_HPET, |
82 | #endif | 83 | #endif |
83 | FIX_DBGP_BASE, | 84 | FIX_DBGP_BASE, |
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 2c6fc9e62812..3b629f47eb65 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h | |||
@@ -1,5 +1,6 @@ | |||
1 | #ifdef __ASSEMBLY__ | 1 | #ifdef __ASSEMBLY__ |
2 | 2 | ||
3 | #include <asm/asm.h> | ||
3 | #include <asm/dwarf2.h> | 4 | #include <asm/dwarf2.h> |
4 | 5 | ||
5 | /* The annotation hides the frame from the unwinder and makes it look | 6 | /* The annotation hides the frame from the unwinder and makes it look |
@@ -7,13 +8,13 @@ | |||
7 | frame pointer later */ | 8 | frame pointer later */ |
8 | #ifdef CONFIG_FRAME_POINTER | 9 | #ifdef CONFIG_FRAME_POINTER |
9 | .macro FRAME | 10 | .macro FRAME |
10 | pushl_cfi %ebp | 11 | __ASM_SIZE(push,_cfi) %__ASM_REG(bp) |
11 | CFI_REL_OFFSET ebp,0 | 12 | CFI_REL_OFFSET __ASM_REG(bp), 0 |
12 | movl %esp,%ebp | 13 | __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) |
13 | .endm | 14 | .endm |
14 | .macro ENDFRAME | 15 | .macro ENDFRAME |
15 | popl_cfi %ebp | 16 | __ASM_SIZE(pop,_cfi) %__ASM_REG(bp) |
16 | CFI_RESTORE ebp | 17 | CFI_RESTORE __ASM_REG(bp) |
17 | .endm | 18 | .endm |
18 | #else | 19 | #else |
19 | .macro FRAME | 20 | .macro FRAME |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index bb9efe8706e2..13f5504c76c0 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -34,7 +34,6 @@ extern void irq_work_interrupt(void); | |||
34 | extern void spurious_interrupt(void); | 34 | extern void spurious_interrupt(void); |
35 | extern void thermal_interrupt(void); | 35 | extern void thermal_interrupt(void); |
36 | extern void reschedule_interrupt(void); | 36 | extern void reschedule_interrupt(void); |
37 | extern void mce_self_interrupt(void); | ||
38 | 37 | ||
39 | extern void invalidate_interrupt(void); | 38 | extern void invalidate_interrupt(void); |
40 | extern void invalidate_interrupt0(void); | 39 | extern void invalidate_interrupt0(void); |
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h deleted file mode 100644 index 65aaa91d5850..000000000000 --- a/arch/x86/include/asm/i8253.h +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | #ifndef _ASM_X86_I8253_H | ||
2 | #define _ASM_X86_I8253_H | ||
3 | |||
4 | /* i8253A PIT registers */ | ||
5 | #define PIT_MODE 0x43 | ||
6 | #define PIT_CH0 0x40 | ||
7 | #define PIT_CH2 0x42 | ||
8 | |||
9 | #define PIT_LATCH LATCH | ||
10 | |||
11 | extern raw_spinlock_t i8253_lock; | ||
12 | |||
13 | extern struct clock_event_device *global_clock_event; | ||
14 | |||
15 | extern void setup_pit_timer(void); | ||
16 | |||
17 | #define inb_pit inb_p | ||
18 | #define outb_pit outb_p | ||
19 | |||
20 | #endif /* _ASM_X86_I8253_H */ | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6e976ee3b3ef..f9a320984a10 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -17,7 +17,8 @@ | |||
17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events | 17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events |
18 | * Vectors 32 ... 127 : device interrupts | 18 | * Vectors 32 ... 127 : device interrupts |
19 | * Vector 128 : legacy int80 syscall interface | 19 | * Vector 128 : legacy int80 syscall interface |
20 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts | 20 | * Vector 204 : legacy x86_64 vsyscall emulation |
21 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts | ||
21 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts | 22 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts |
22 | * | 23 | * |
23 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. | 24 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. |
@@ -50,6 +51,9 @@ | |||
50 | #ifdef CONFIG_X86_32 | 51 | #ifdef CONFIG_X86_32 |
51 | # define SYSCALL_VECTOR 0x80 | 52 | # define SYSCALL_VECTOR 0x80 |
52 | #endif | 53 | #endif |
54 | #ifdef CONFIG_X86_64 | ||
55 | # define VSYSCALL_EMU_VECTOR 0xcc | ||
56 | #endif | ||
53 | 57 | ||
54 | /* | 58 | /* |
55 | * Vectors 0x30-0x3f are used for ISA interrupts. | 59 | * Vectors 0x30-0x3f are used for ISA interrupts. |
@@ -109,11 +113,6 @@ | |||
109 | 113 | ||
110 | #define UV_BAU_MESSAGE 0xf5 | 114 | #define UV_BAU_MESSAGE 0xf5 |
111 | 115 | ||
112 | /* | ||
113 | * Self IPI vector for machine checks | ||
114 | */ | ||
115 | #define MCE_SELF_VECTOR 0xf4 | ||
116 | |||
117 | /* Xen vector callback to receive events in a HVM domain */ | 116 | /* Xen vector callback to receive events in a HVM domain */ |
118 | #define XEN_HVM_EVTCHN_CALLBACK 0xf3 | 117 | #define XEN_HVM_EVTCHN_CALLBACK 0xf3 |
119 | 118 | ||
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 5745ce8bf108..bba3cf88e624 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h | |||
@@ -60,23 +60,24 @@ static inline void native_halt(void) | |||
60 | #include <asm/paravirt.h> | 60 | #include <asm/paravirt.h> |
61 | #else | 61 | #else |
62 | #ifndef __ASSEMBLY__ | 62 | #ifndef __ASSEMBLY__ |
63 | #include <linux/types.h> | ||
63 | 64 | ||
64 | static inline unsigned long arch_local_save_flags(void) | 65 | static inline notrace unsigned long arch_local_save_flags(void) |
65 | { | 66 | { |
66 | return native_save_fl(); | 67 | return native_save_fl(); |
67 | } | 68 | } |
68 | 69 | ||
69 | static inline void arch_local_irq_restore(unsigned long flags) | 70 | static inline notrace void arch_local_irq_restore(unsigned long flags) |
70 | { | 71 | { |
71 | native_restore_fl(flags); | 72 | native_restore_fl(flags); |
72 | } | 73 | } |
73 | 74 | ||
74 | static inline void arch_local_irq_disable(void) | 75 | static inline notrace void arch_local_irq_disable(void) |
75 | { | 76 | { |
76 | native_irq_disable(); | 77 | native_irq_disable(); |
77 | } | 78 | } |
78 | 79 | ||
79 | static inline void arch_local_irq_enable(void) | 80 | static inline notrace void arch_local_irq_enable(void) |
80 | { | 81 | { |
81 | native_irq_enable(); | 82 | native_irq_enable(); |
82 | } | 83 | } |
@@ -102,7 +103,7 @@ static inline void halt(void) | |||
102 | /* | 103 | /* |
103 | * For spinlocks, etc: | 104 | * For spinlocks, etc: |
104 | */ | 105 | */ |
105 | static inline unsigned long arch_local_irq_save(void) | 106 | static inline notrace unsigned long arch_local_irq_save(void) |
106 | { | 107 | { |
107 | unsigned long flags = arch_local_save_flags(); | 108 | unsigned long flags = arch_local_save_flags(); |
108 | arch_local_irq_disable(); | 109 | arch_local_irq_disable(); |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0049211959c0..6040d115ef51 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -229,7 +229,26 @@ struct read_cache { | |||
229 | unsigned long end; | 229 | unsigned long end; |
230 | }; | 230 | }; |
231 | 231 | ||
232 | struct decode_cache { | 232 | struct x86_emulate_ctxt { |
233 | struct x86_emulate_ops *ops; | ||
234 | |||
235 | /* Register state before/after emulation. */ | ||
236 | unsigned long eflags; | ||
237 | unsigned long eip; /* eip before instruction emulation */ | ||
238 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | ||
239 | int mode; | ||
240 | |||
241 | /* interruptibility state, as a result of execution of STI or MOV SS */ | ||
242 | int interruptibility; | ||
243 | |||
244 | bool guest_mode; /* guest running a nested guest */ | ||
245 | bool perm_ok; /* do not check permissions if true */ | ||
246 | bool only_vendor_specific_insn; | ||
247 | |||
248 | bool have_exception; | ||
249 | struct x86_exception exception; | ||
250 | |||
251 | /* decode cache */ | ||
233 | u8 twobyte; | 252 | u8 twobyte; |
234 | u8 b; | 253 | u8 b; |
235 | u8 intercept; | 254 | u8 intercept; |
@@ -246,8 +265,6 @@ struct decode_cache { | |||
246 | unsigned int d; | 265 | unsigned int d; |
247 | int (*execute)(struct x86_emulate_ctxt *ctxt); | 266 | int (*execute)(struct x86_emulate_ctxt *ctxt); |
248 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); | 267 | int (*check_perm)(struct x86_emulate_ctxt *ctxt); |
249 | unsigned long regs[NR_VCPU_REGS]; | ||
250 | unsigned long eip; | ||
251 | /* modrm */ | 268 | /* modrm */ |
252 | u8 modrm; | 269 | u8 modrm; |
253 | u8 modrm_mod; | 270 | u8 modrm_mod; |
@@ -255,34 +272,14 @@ struct decode_cache { | |||
255 | u8 modrm_rm; | 272 | u8 modrm_rm; |
256 | u8 modrm_seg; | 273 | u8 modrm_seg; |
257 | bool rip_relative; | 274 | bool rip_relative; |
275 | unsigned long _eip; | ||
276 | /* Fields above regs are cleared together. */ | ||
277 | unsigned long regs[NR_VCPU_REGS]; | ||
258 | struct fetch_cache fetch; | 278 | struct fetch_cache fetch; |
259 | struct read_cache io_read; | 279 | struct read_cache io_read; |
260 | struct read_cache mem_read; | 280 | struct read_cache mem_read; |
261 | }; | 281 | }; |
262 | 282 | ||
263 | struct x86_emulate_ctxt { | ||
264 | struct x86_emulate_ops *ops; | ||
265 | |||
266 | /* Register state before/after emulation. */ | ||
267 | unsigned long eflags; | ||
268 | unsigned long eip; /* eip before instruction emulation */ | ||
269 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | ||
270 | int mode; | ||
271 | |||
272 | /* interruptibility state, as a result of execution of STI or MOV SS */ | ||
273 | int interruptibility; | ||
274 | |||
275 | bool guest_mode; /* guest running a nested guest */ | ||
276 | bool perm_ok; /* do not check permissions if true */ | ||
277 | bool only_vendor_specific_insn; | ||
278 | |||
279 | bool have_exception; | ||
280 | struct x86_exception exception; | ||
281 | |||
282 | /* decode cache */ | ||
283 | struct decode_cache decode; | ||
284 | }; | ||
285 | |||
286 | /* Repeat String Operation Prefix */ | 283 | /* Repeat String Operation Prefix */ |
287 | #define REPE_PREFIX 0xf3 | 284 | #define REPE_PREFIX 0xf3 |
288 | #define REPNE_PREFIX 0xf2 | 285 | #define REPNE_PREFIX 0xf2 |
@@ -373,6 +370,5 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); | |||
373 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | 370 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, |
374 | u16 tss_selector, int reason, | 371 | u16 tss_selector, int reason, |
375 | bool has_error_code, u32 error_code); | 372 | bool has_error_code, u32 error_code); |
376 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, | 373 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); |
377 | struct x86_emulate_ops *ops, int irq); | ||
378 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 374 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d2ac8e2ee897..dd51c83aa5de 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -48,7 +48,7 @@ | |||
48 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 48 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
49 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 49 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
50 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 50 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
51 | | X86_CR4_OSXSAVE \ | 51 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ |
52 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 52 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
53 | 53 | ||
54 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 54 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
@@ -205,6 +205,7 @@ union kvm_mmu_page_role { | |||
205 | unsigned invalid:1; | 205 | unsigned invalid:1; |
206 | unsigned nxe:1; | 206 | unsigned nxe:1; |
207 | unsigned cr0_wp:1; | 207 | unsigned cr0_wp:1; |
208 | unsigned smep_andnot_wp:1; | ||
208 | }; | 209 | }; |
209 | }; | 210 | }; |
210 | 211 | ||
@@ -227,15 +228,17 @@ struct kvm_mmu_page { | |||
227 | * in this shadow page. | 228 | * in this shadow page. |
228 | */ | 229 | */ |
229 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 230 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
230 | bool multimapped; /* More than one parent_pte? */ | ||
231 | bool unsync; | 231 | bool unsync; |
232 | int root_count; /* Currently serving as active root */ | 232 | int root_count; /* Currently serving as active root */ |
233 | unsigned int unsync_children; | 233 | unsigned int unsync_children; |
234 | union { | 234 | unsigned long parent_ptes; /* Reverse mapping for parent_pte */ |
235 | u64 *parent_pte; /* !multimapped */ | ||
236 | struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ | ||
237 | }; | ||
238 | DECLARE_BITMAP(unsync_child_bitmap, 512); | 235 | DECLARE_BITMAP(unsync_child_bitmap, 512); |
236 | |||
237 | #ifdef CONFIG_X86_32 | ||
238 | int clear_spte_count; | ||
239 | #endif | ||
240 | |||
241 | struct rcu_head rcu; | ||
239 | }; | 242 | }; |
240 | 243 | ||
241 | struct kvm_pv_mmu_op_buffer { | 244 | struct kvm_pv_mmu_op_buffer { |
@@ -269,8 +272,6 @@ struct kvm_mmu { | |||
269 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, | 272 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, |
270 | struct x86_exception *exception); | 273 | struct x86_exception *exception); |
271 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); | 274 | gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); |
272 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | ||
273 | struct kvm_mmu_page *page); | ||
274 | int (*sync_page)(struct kvm_vcpu *vcpu, | 275 | int (*sync_page)(struct kvm_vcpu *vcpu, |
275 | struct kvm_mmu_page *sp); | 276 | struct kvm_mmu_page *sp); |
276 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 277 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
@@ -346,8 +347,7 @@ struct kvm_vcpu_arch { | |||
346 | * put it here to avoid allocation */ | 347 | * put it here to avoid allocation */ |
347 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; | 348 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; |
348 | 349 | ||
349 | struct kvm_mmu_memory_cache mmu_pte_chain_cache; | 350 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; |
350 | struct kvm_mmu_memory_cache mmu_rmap_desc_cache; | ||
351 | struct kvm_mmu_memory_cache mmu_page_cache; | 351 | struct kvm_mmu_memory_cache mmu_page_cache; |
352 | struct kvm_mmu_memory_cache mmu_page_header_cache; | 352 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
353 | 353 | ||
@@ -393,6 +393,15 @@ struct kvm_vcpu_arch { | |||
393 | unsigned int hw_tsc_khz; | 393 | unsigned int hw_tsc_khz; |
394 | unsigned int time_offset; | 394 | unsigned int time_offset; |
395 | struct page *time_page; | 395 | struct page *time_page; |
396 | |||
397 | struct { | ||
398 | u64 msr_val; | ||
399 | u64 last_steal; | ||
400 | u64 accum_steal; | ||
401 | struct gfn_to_hva_cache stime; | ||
402 | struct kvm_steal_time steal; | ||
403 | } st; | ||
404 | |||
396 | u64 last_guest_tsc; | 405 | u64 last_guest_tsc; |
397 | u64 last_kernel_ns; | 406 | u64 last_kernel_ns; |
398 | u64 last_tsc_nsec; | 407 | u64 last_tsc_nsec; |
@@ -419,6 +428,11 @@ struct kvm_vcpu_arch { | |||
419 | u64 mcg_ctl; | 428 | u64 mcg_ctl; |
420 | u64 *mce_banks; | 429 | u64 *mce_banks; |
421 | 430 | ||
431 | /* Cache MMIO info */ | ||
432 | u64 mmio_gva; | ||
433 | unsigned access; | ||
434 | gfn_t mmio_gfn; | ||
435 | |||
422 | /* used for guest single stepping over the given code position */ | 436 | /* used for guest single stepping over the given code position */ |
423 | unsigned long singlestep_rip; | 437 | unsigned long singlestep_rip; |
424 | 438 | ||
@@ -441,6 +455,7 @@ struct kvm_arch { | |||
441 | unsigned int n_used_mmu_pages; | 455 | unsigned int n_used_mmu_pages; |
442 | unsigned int n_requested_mmu_pages; | 456 | unsigned int n_requested_mmu_pages; |
443 | unsigned int n_max_mmu_pages; | 457 | unsigned int n_max_mmu_pages; |
458 | unsigned int indirect_shadow_pages; | ||
444 | atomic_t invlpg_counter; | 459 | atomic_t invlpg_counter; |
445 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 460 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
446 | /* | 461 | /* |
@@ -477,6 +492,8 @@ struct kvm_arch { | |||
477 | u64 hv_guest_os_id; | 492 | u64 hv_guest_os_id; |
478 | u64 hv_hypercall; | 493 | u64 hv_hypercall; |
479 | 494 | ||
495 | atomic_t reader_counter; | ||
496 | |||
480 | #ifdef CONFIG_KVM_MMU_AUDIT | 497 | #ifdef CONFIG_KVM_MMU_AUDIT |
481 | int audit_point; | 498 | int audit_point; |
482 | #endif | 499 | #endif |
@@ -559,7 +576,7 @@ struct kvm_x86_ops { | |||
559 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); | 576 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); |
560 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); | 577 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
561 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 578 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
562 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | 579 | int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
563 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); | 580 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
564 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 581 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
565 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 582 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
@@ -636,7 +653,6 @@ void kvm_mmu_module_exit(void); | |||
636 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | 653 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); |
637 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | 654 | int kvm_mmu_create(struct kvm_vcpu *vcpu); |
638 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); | 655 | int kvm_mmu_setup(struct kvm_vcpu *vcpu); |
639 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); | ||
640 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 656 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
641 | u64 dirty_mask, u64 nx_mask, u64 x_mask); | 657 | u64 dirty_mask, u64 nx_mask, u64 x_mask); |
642 | 658 | ||
@@ -830,11 +846,12 @@ enum { | |||
830 | asmlinkage void kvm_spurious_fault(void); | 846 | asmlinkage void kvm_spurious_fault(void); |
831 | extern bool kvm_rebooting; | 847 | extern bool kvm_rebooting; |
832 | 848 | ||
833 | #define __kvm_handle_fault_on_reboot(insn) \ | 849 | #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ |
834 | "666: " insn "\n\t" \ | 850 | "666: " insn "\n\t" \ |
835 | "668: \n\t" \ | 851 | "668: \n\t" \ |
836 | ".pushsection .fixup, \"ax\" \n" \ | 852 | ".pushsection .fixup, \"ax\" \n" \ |
837 | "667: \n\t" \ | 853 | "667: \n\t" \ |
854 | cleanup_insn "\n\t" \ | ||
838 | "cmpb $0, kvm_rebooting \n\t" \ | 855 | "cmpb $0, kvm_rebooting \n\t" \ |
839 | "jne 668b \n\t" \ | 856 | "jne 668b \n\t" \ |
840 | __ASM_SIZE(push) " $666b \n\t" \ | 857 | __ASM_SIZE(push) " $666b \n\t" \ |
@@ -844,6 +861,9 @@ extern bool kvm_rebooting; | |||
844 | _ASM_PTR " 666b, 667b \n\t" \ | 861 | _ASM_PTR " 666b, 667b \n\t" \ |
845 | ".popsection" | 862 | ".popsection" |
846 | 863 | ||
864 | #define __kvm_handle_fault_on_reboot(insn) \ | ||
865 | ____kvm_handle_fault_on_reboot(insn, "") | ||
866 | |||
847 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 867 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
848 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 868 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
849 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 869 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index a427bf77a93d..734c3767cfac 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | 22 | #define KVM_FEATURE_CLOCKSOURCE2 3 |
23 | #define KVM_FEATURE_ASYNC_PF 4 | 23 | #define KVM_FEATURE_ASYNC_PF 4 |
24 | #define KVM_FEATURE_STEAL_TIME 5 | ||
24 | 25 | ||
25 | /* The last 8 bits are used to indicate how to interpret the flags field | 26 | /* The last 8 bits are used to indicate how to interpret the flags field |
26 | * in pvclock structure. If no bits are set, all flags are ignored. | 27 | * in pvclock structure. If no bits are set, all flags are ignored. |
@@ -30,10 +31,23 @@ | |||
30 | #define MSR_KVM_WALL_CLOCK 0x11 | 31 | #define MSR_KVM_WALL_CLOCK 0x11 |
31 | #define MSR_KVM_SYSTEM_TIME 0x12 | 32 | #define MSR_KVM_SYSTEM_TIME 0x12 |
32 | 33 | ||
34 | #define KVM_MSR_ENABLED 1 | ||
33 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | 35 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ |
34 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | 36 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 |
35 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | 37 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 |
36 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 | 38 | #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 |
39 | #define MSR_KVM_STEAL_TIME 0x4b564d03 | ||
40 | |||
41 | struct kvm_steal_time { | ||
42 | __u64 steal; | ||
43 | __u32 version; | ||
44 | __u32 flags; | ||
45 | __u32 pad[12]; | ||
46 | }; | ||
47 | |||
48 | #define KVM_STEAL_ALIGNMENT_BITS 5 | ||
49 | #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) | ||
50 | #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) | ||
37 | 51 | ||
38 | #define KVM_MAX_MMU_OP_BATCH 32 | 52 | #define KVM_MAX_MMU_OP_BATCH 32 |
39 | 53 | ||
@@ -178,6 +192,7 @@ void __init kvm_guest_init(void); | |||
178 | void kvm_async_pf_task_wait(u32 token); | 192 | void kvm_async_pf_task_wait(u32 token); |
179 | void kvm_async_pf_task_wake(u32 token); | 193 | void kvm_async_pf_task_wake(u32 token); |
180 | u32 kvm_read_and_reset_pf_reason(void); | 194 | u32 kvm_read_and_reset_pf_reason(void); |
195 | extern void kvm_disable_steal_time(void); | ||
181 | #else | 196 | #else |
182 | #define kvm_guest_init() do { } while (0) | 197 | #define kvm_guest_init() do { } while (0) |
183 | #define kvm_async_pf_task_wait(T) do {} while(0) | 198 | #define kvm_async_pf_task_wait(T) do {} while(0) |
@@ -186,6 +201,11 @@ static inline u32 kvm_read_and_reset_pf_reason(void) | |||
186 | { | 201 | { |
187 | return 0; | 202 | return 0; |
188 | } | 203 | } |
204 | |||
205 | static inline void kvm_disable_steal_time(void) | ||
206 | { | ||
207 | return; | ||
208 | } | ||
189 | #endif | 209 | #endif |
190 | 210 | ||
191 | #endif /* __KERNEL__ */ | 211 | #endif /* __KERNEL__ */ |
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index b60f2924c413..879fd7d33877 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
@@ -61,6 +61,7 @@ hcall(unsigned long call, | |||
61 | : "memory"); | 61 | : "memory"); |
62 | return call; | 62 | return call; |
63 | } | 63 | } |
64 | /*:*/ | ||
64 | 65 | ||
65 | /* Can't use our min() macro here: needs to be a constant */ | 66 | /* Can't use our min() macro here: needs to be a constant */ |
66 | #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) | 67 | #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 021979a6e23f..716b48af7863 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -8,6 +8,7 @@ | |||
8 | * Machine Check support for x86 | 8 | * Machine Check support for x86 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* MCG_CAP register defines */ | ||
11 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ | 12 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ |
12 | #define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ | 13 | #define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ |
13 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ | 14 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ |
@@ -17,10 +18,12 @@ | |||
17 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) | 18 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) |
18 | #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ | 19 | #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ |
19 | 20 | ||
21 | /* MCG_STATUS register defines */ | ||
20 | #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ | 22 | #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ |
21 | #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ | 23 | #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ |
22 | #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ | 24 | #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ |
23 | 25 | ||
26 | /* MCi_STATUS register defines */ | ||
24 | #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ | 27 | #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ |
25 | #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ | 28 | #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ |
26 | #define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ | 29 | #define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ |
@@ -31,12 +34,14 @@ | |||
31 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | 34 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ |
32 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | 35 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ |
33 | 36 | ||
34 | /* MISC register defines */ | 37 | /* MCi_MISC register defines */ |
35 | #define MCM_ADDR_SEGOFF 0 /* segment offset */ | 38 | #define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) |
36 | #define MCM_ADDR_LINEAR 1 /* linear address */ | 39 | #define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) |
37 | #define MCM_ADDR_PHYS 2 /* physical address */ | 40 | #define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ |
38 | #define MCM_ADDR_MEM 3 /* memory address */ | 41 | #define MCI_MISC_ADDR_LINEAR 1 /* linear address */ |
39 | #define MCM_ADDR_GENERIC 7 /* generic */ | 42 | #define MCI_MISC_ADDR_PHYS 2 /* physical address */ |
43 | #define MCI_MISC_ADDR_MEM 3 /* memory address */ | ||
44 | #define MCI_MISC_ADDR_GENERIC 7 /* generic */ | ||
40 | 45 | ||
41 | /* CTL2 register defines */ | 46 | /* CTL2 register defines */ |
42 | #define MCI_CTL2_CMCI_EN (1ULL << 30) | 47 | #define MCI_CTL2_CMCI_EN (1ULL << 30) |
@@ -144,7 +149,7 @@ static inline void enable_p5_mce(void) {} | |||
144 | 149 | ||
145 | void mce_setup(struct mce *m); | 150 | void mce_setup(struct mce *m); |
146 | void mce_log(struct mce *m); | 151 | void mce_log(struct mce *m); |
147 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 152 | DECLARE_PER_CPU(struct sys_device, mce_sysdev); |
148 | 153 | ||
149 | /* | 154 | /* |
150 | * Maximum banks number. | 155 | * Maximum banks number. |
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 224e8c5eb307..55728e121473 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -34,15 +34,15 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {} | |||
34 | * 64Gb / 4096bytes/page = 16777216 pages | 34 | * 64Gb / 4096bytes/page = 16777216 pages |
35 | */ | 35 | */ |
36 | #define MAX_NR_PAGES 16777216 | 36 | #define MAX_NR_PAGES 16777216 |
37 | #define MAX_ELEMENTS 1024 | 37 | #define MAX_SECTIONS 1024 |
38 | #define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS) | 38 | #define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS) |
39 | 39 | ||
40 | extern s8 physnode_map[]; | 40 | extern s8 physnode_map[]; |
41 | 41 | ||
42 | static inline int pfn_to_nid(unsigned long pfn) | 42 | static inline int pfn_to_nid(unsigned long pfn) |
43 | { | 43 | { |
44 | #ifdef CONFIG_NUMA | 44 | #ifdef CONFIG_NUMA |
45 | return((int) physnode_map[(pfn) / PAGES_PER_ELEMENT]); | 45 | return((int) physnode_map[(pfn) / PAGES_PER_SECTION]); |
46 | #else | 46 | #else |
47 | return 0; | 47 | return 0; |
48 | #endif | 48 | #endif |
@@ -57,6 +57,8 @@ static inline int pfn_valid(int pfn) | |||
57 | return 0; | 57 | return 0; |
58 | } | 58 | } |
59 | 59 | ||
60 | #define early_pfn_valid(pfn) pfn_valid((pfn)) | ||
61 | |||
60 | #endif /* CONFIG_DISCONTIGMEM */ | 62 | #endif /* CONFIG_DISCONTIGMEM */ |
61 | 63 | ||
62 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 64 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 485b4f1f079b..d52609aeeab8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -259,6 +259,9 @@ | |||
259 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 | 259 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 |
260 | 260 | ||
261 | #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 | 261 | #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 |
262 | #define ENERGY_PERF_BIAS_PERFORMANCE 0 | ||
263 | #define ENERGY_PERF_BIAS_NORMAL 6 | ||
264 | #define ENERGY_PERF_BIAS_POWERSAVE 15 | ||
262 | 265 | ||
263 | #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 | 266 | #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 |
264 | 267 | ||
@@ -438,6 +441,18 @@ | |||
438 | #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a | 441 | #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a |
439 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b | 442 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b |
440 | #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c | 443 | #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c |
444 | #define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d | ||
445 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e | ||
446 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f | ||
447 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 | ||
448 | |||
449 | /* VMX_BASIC bits and bitmasks */ | ||
450 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 | ||
451 | #define VMX_BASIC_64 0x0001000000000000LLU | ||
452 | #define VMX_BASIC_MEM_TYPE_SHIFT 50 | ||
453 | #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU | ||
454 | #define VMX_BASIC_MEM_TYPE_WB 6LLU | ||
455 | #define VMX_BASIC_INOUT 0x0040000000000000LLU | ||
441 | 456 | ||
442 | /* AMD-V MSRs */ | 457 | /* AMD-V MSRs */ |
443 | 458 | ||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ebbc4d8ab170..a7d2db9a74fb 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -230,6 +230,15 @@ static inline unsigned long long paravirt_sched_clock(void) | |||
230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | 230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); |
231 | } | 231 | } |
232 | 232 | ||
233 | struct jump_label_key; | ||
234 | extern struct jump_label_key paravirt_steal_enabled; | ||
235 | extern struct jump_label_key paravirt_steal_rq_enabled; | ||
236 | |||
237 | static inline u64 paravirt_steal_clock(int cpu) | ||
238 | { | ||
239 | return PVOP_CALL1(u64, pv_time_ops.steal_clock, cpu); | ||
240 | } | ||
241 | |||
233 | static inline unsigned long long paravirt_read_pmc(int counter) | 242 | static inline unsigned long long paravirt_read_pmc(int counter) |
234 | { | 243 | { |
235 | return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); | 244 | return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 82885099c869..2c7652163111 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -89,6 +89,7 @@ struct pv_lazy_ops { | |||
89 | 89 | ||
90 | struct pv_time_ops { | 90 | struct pv_time_ops { |
91 | unsigned long long (*sched_clock)(void); | 91 | unsigned long long (*sched_clock)(void); |
92 | unsigned long long (*steal_clock)(int cpu); | ||
92 | unsigned long (*get_tsc_khz)(void); | 93 | unsigned long (*get_tsc_khz)(void); |
93 | }; | 94 | }; |
94 | 95 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a0a9779084d1..3470c9d0ebba 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -388,12 +388,9 @@ do { \ | |||
388 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) | 388 | #define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) |
389 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) | 389 | #define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) |
390 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) | 390 | #define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) |
391 | /* | 391 | #define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) |
392 | * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much | 392 | #define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) |
393 | * faster than an xchg with forced lock semantics. | 393 | #define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) |
394 | */ | ||
395 | #define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
396 | #define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
397 | 394 | ||
398 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 395 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
399 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 396 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -485,6 +482,8 @@ do { \ | |||
485 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 482 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
486 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 483 | #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
487 | #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | 484 | #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) |
485 | #define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
486 | #define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
488 | 487 | ||
489 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 488 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
490 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 489 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index d9d4dae305f6..094fb30817ab 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -152,6 +152,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); | |||
152 | (regs)->bp = caller_frame_pointer(); \ | 152 | (regs)->bp = caller_frame_pointer(); \ |
153 | (regs)->cs = __KERNEL_CS; \ | 153 | (regs)->cs = __KERNEL_CS; \ |
154 | regs->flags = 0; \ | 154 | regs->flags = 0; \ |
155 | asm volatile( \ | ||
156 | _ASM_MOV "%%"_ASM_SP ", %0\n" \ | ||
157 | : "=m" ((regs)->sp) \ | ||
158 | :: "memory" \ | ||
159 | ); \ | ||
155 | } | 160 | } |
156 | 161 | ||
157 | #else | 162 | #else |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 56fd9e3abbda..4f7e67e2345e 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -102,6 +102,14 @@ | |||
102 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | 102 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) |
103 | 103 | ||
104 | /* | 104 | /* |
105 | * If an event has alias it should be marked | ||
106 | * with a special bit. (Don't forget to check | ||
107 | * P4_PEBS_CONFIG_MASK and related bits on | ||
108 | * modification.) | ||
109 | */ | ||
110 | #define P4_CONFIG_ALIASABLE (1 << 9) | ||
111 | |||
112 | /* | ||
105 | * The bits we allow to pass for RAW events | 113 | * The bits we allow to pass for RAW events |
106 | */ | 114 | */ |
107 | #define P4_CONFIG_MASK_ESCR \ | 115 | #define P4_CONFIG_MASK_ESCR \ |
@@ -123,6 +131,31 @@ | |||
123 | (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ | 131 | (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ |
124 | (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) | 132 | (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) |
125 | 133 | ||
134 | /* | ||
135 | * In case of event aliasing we need to preserve some | ||
136 | * caller bits, otherwise the mapping won't be complete. | ||
137 | */ | ||
138 | #define P4_CONFIG_EVENT_ALIAS_MASK \ | ||
139 | (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \ | ||
140 | p4_config_pack_cccr(P4_CCCR_EDGE | \ | ||
141 | P4_CCCR_THRESHOLD_MASK | \ | ||
142 | P4_CCCR_COMPLEMENT | \ | ||
143 | P4_CCCR_COMPARE)) | ||
144 | |||
145 | #define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \ | ||
146 | ((P4_CONFIG_HT) | \ | ||
147 | p4_config_pack_escr(P4_ESCR_T0_OS | \ | ||
148 | P4_ESCR_T0_USR | \ | ||
149 | P4_ESCR_T1_OS | \ | ||
150 | P4_ESCR_T1_USR) | \ | ||
151 | p4_config_pack_cccr(P4_CCCR_OVF | \ | ||
152 | P4_CCCR_CASCADE | \ | ||
153 | P4_CCCR_FORCE_OVF | \ | ||
154 | P4_CCCR_THREAD_ANY | \ | ||
155 | P4_CCCR_OVF_PMI_T0 | \ | ||
156 | P4_CCCR_OVF_PMI_T1 | \ | ||
157 | P4_CONFIG_ALIASABLE)) | ||
158 | |||
126 | static inline bool p4_is_event_cascaded(u64 config) | 159 | static inline bool p4_is_event_cascaded(u64 config) |
127 | { | 160 | { |
128 | u32 cccr = p4_config_unpack_cccr(config); | 161 | u32 cccr = p4_config_unpack_cccr(config); |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d56187c6b838..013286a10c2c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -107,7 +107,8 @@ | |||
107 | #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) | 107 | #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) |
108 | #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) | 108 | #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) |
109 | #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) | 109 | #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) |
110 | #define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) | 110 | #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) |
111 | #define __PAGE_KERNEL_VVAR_NOCACHE (__PAGE_KERNEL_VVAR | _PAGE_PCD | _PAGE_PWT) | ||
111 | #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) | 112 | #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) |
112 | #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) | 113 | #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) |
113 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) | 114 | #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) |
@@ -129,7 +130,8 @@ | |||
129 | #define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) | 130 | #define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) |
130 | #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) | 131 | #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) |
131 | #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) | 132 | #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) |
132 | #define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) | 133 | #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR) |
134 | #define PAGE_KERNEL_VVAR_NOCACHE __pgprot(__PAGE_KERNEL_VVAR_NOCACHE) | ||
133 | 135 | ||
134 | #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) | 136 | #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) |
135 | #define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) | 137 | #define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) |
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 59ab4dffa377..2dddb317bb39 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -59,6 +59,7 @@ | |||
59 | #define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ | 59 | #define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ |
60 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ | 60 | #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ |
61 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ | 61 | #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ |
62 | #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ | ||
62 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ | 63 | #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ |
63 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ | 64 | #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ |
64 | 65 | ||
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index 971e0b46446e..df1287019e6d 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h | |||
@@ -30,17 +30,6 @@ extern void add_dtb(u64 data); | |||
30 | extern void x86_add_irq_domains(void); | 30 | extern void x86_add_irq_domains(void); |
31 | void __cpuinit x86_of_pci_init(void); | 31 | void __cpuinit x86_of_pci_init(void); |
32 | void x86_dtb_init(void); | 32 | void x86_dtb_init(void); |
33 | |||
34 | static inline struct device_node *pci_device_to_OF_node(struct pci_dev *pdev) | ||
35 | { | ||
36 | return pdev ? pdev->dev.of_node : NULL; | ||
37 | } | ||
38 | |||
39 | static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus) | ||
40 | { | ||
41 | return pci_device_to_OF_node(bus->self); | ||
42 | } | ||
43 | |||
44 | #else | 33 | #else |
45 | static inline void add_dtb(u64 data) { } | 34 | static inline void add_dtb(u64 data) { } |
46 | static inline void x86_add_irq_domains(void) { } | 35 | static inline void x86_add_irq_domains(void) { } |
diff --git a/arch/x86/include/asm/rwlock.h b/arch/x86/include/asm/rwlock.h index 6a8c0d645108..a5370a03d90c 100644 --- a/arch/x86/include/asm/rwlock.h +++ b/arch/x86/include/asm/rwlock.h | |||
@@ -1,7 +1,48 @@ | |||
1 | #ifndef _ASM_X86_RWLOCK_H | 1 | #ifndef _ASM_X86_RWLOCK_H |
2 | #define _ASM_X86_RWLOCK_H | 2 | #define _ASM_X86_RWLOCK_H |
3 | 3 | ||
4 | #define RW_LOCK_BIAS 0x01000000 | 4 | #include <asm/asm.h> |
5 | |||
6 | #if CONFIG_NR_CPUS <= 2048 | ||
7 | |||
8 | #ifndef __ASSEMBLY__ | ||
9 | typedef union { | ||
10 | s32 lock; | ||
11 | s32 write; | ||
12 | } arch_rwlock_t; | ||
13 | #endif | ||
14 | |||
15 | #define RW_LOCK_BIAS 0x00100000 | ||
16 | #define READ_LOCK_SIZE(insn) __ASM_FORM(insn##l) | ||
17 | #define READ_LOCK_ATOMIC(n) atomic_##n | ||
18 | #define WRITE_LOCK_ADD(n) __ASM_FORM_COMMA(addl n) | ||
19 | #define WRITE_LOCK_SUB(n) __ASM_FORM_COMMA(subl n) | ||
20 | #define WRITE_LOCK_CMP RW_LOCK_BIAS | ||
21 | |||
22 | #else /* CONFIG_NR_CPUS > 2048 */ | ||
23 | |||
24 | #include <linux/const.h> | ||
25 | |||
26 | #ifndef __ASSEMBLY__ | ||
27 | typedef union { | ||
28 | s64 lock; | ||
29 | struct { | ||
30 | u32 read; | ||
31 | s32 write; | ||
32 | }; | ||
33 | } arch_rwlock_t; | ||
34 | #endif | ||
35 | |||
36 | #define RW_LOCK_BIAS (_AC(1,L) << 32) | ||
37 | #define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q) | ||
38 | #define READ_LOCK_ATOMIC(n) atomic64_##n | ||
39 | #define WRITE_LOCK_ADD(n) __ASM_FORM(incl) | ||
40 | #define WRITE_LOCK_SUB(n) __ASM_FORM(decl) | ||
41 | #define WRITE_LOCK_CMP 1 | ||
42 | |||
43 | #endif /* CONFIG_NR_CPUS */ | ||
44 | |||
45 | #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } | ||
5 | 46 | ||
6 | /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ | 47 | /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ |
7 | 48 | ||
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index cd84f7208f76..5e641715c3fe 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h | |||
@@ -162,7 +162,7 @@ | |||
162 | #define GDT_ENTRY_DEFAULT_USER32_CS 4 | 162 | #define GDT_ENTRY_DEFAULT_USER32_CS 4 |
163 | #define GDT_ENTRY_DEFAULT_USER_DS 5 | 163 | #define GDT_ENTRY_DEFAULT_USER_DS 5 |
164 | #define GDT_ENTRY_DEFAULT_USER_CS 6 | 164 | #define GDT_ENTRY_DEFAULT_USER_CS 6 |
165 | #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3) | 165 | #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3) |
166 | #define __USER32_DS __USER_DS | 166 | #define __USER32_DS __USER_DS |
167 | 167 | ||
168 | #define GDT_ENTRY_TSS 8 /* needs two entries */ | 168 | #define GDT_ENTRY_TSS 8 /* needs two entries */ |
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 725b77831993..49adfd7bb4a4 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h | |||
@@ -10,7 +10,11 @@ static inline void smpboot_clear_io_apic_irqs(void) | |||
10 | 10 | ||
11 | static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) | 11 | static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) |
12 | { | 12 | { |
13 | unsigned long flags; | ||
14 | |||
15 | spin_lock_irqsave(&rtc_lock, flags); | ||
13 | CMOS_WRITE(0xa, 0xf); | 16 | CMOS_WRITE(0xa, 0xf); |
17 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
14 | local_flush_tlb(); | 18 | local_flush_tlb(); |
15 | pr_debug("1.\n"); | 19 | pr_debug("1.\n"); |
16 | *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) = | 20 | *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) = |
@@ -23,6 +27,8 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) | |||
23 | 27 | ||
24 | static inline void smpboot_restore_warm_reset_vector(void) | 28 | static inline void smpboot_restore_warm_reset_vector(void) |
25 | { | 29 | { |
30 | unsigned long flags; | ||
31 | |||
26 | /* | 32 | /* |
27 | * Install writable page 0 entry to set BIOS data area. | 33 | * Install writable page 0 entry to set BIOS data area. |
28 | */ | 34 | */ |
@@ -32,7 +38,9 @@ static inline void smpboot_restore_warm_reset_vector(void) | |||
32 | * Paranoid: Set warm reset code and vector here back | 38 | * Paranoid: Set warm reset code and vector here back |
33 | * to default values. | 39 | * to default values. |
34 | */ | 40 | */ |
41 | spin_lock_irqsave(&rtc_lock, flags); | ||
35 | CMOS_WRITE(0, 0xf); | 42 | CMOS_WRITE(0, 0xf); |
43 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
36 | 44 | ||
37 | *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0; | 45 | *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0; |
38 | } | 46 | } |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 3089f70c0c52..e9e51f710e6c 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _ASM_X86_SPINLOCK_H | 2 | #define _ASM_X86_SPINLOCK_H |
3 | 3 | ||
4 | #include <asm/atomic.h> | 4 | #include <asm/atomic.h> |
5 | #include <asm/rwlock.h> | ||
6 | #include <asm/page.h> | 5 | #include <asm/page.h> |
7 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
8 | #include <linux/compiler.h> | 7 | #include <linux/compiler.h> |
@@ -234,7 +233,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | |||
234 | */ | 233 | */ |
235 | static inline int arch_read_can_lock(arch_rwlock_t *lock) | 234 | static inline int arch_read_can_lock(arch_rwlock_t *lock) |
236 | { | 235 | { |
237 | return (int)(lock)->lock > 0; | 236 | return lock->lock > 0; |
238 | } | 237 | } |
239 | 238 | ||
240 | /** | 239 | /** |
@@ -243,12 +242,12 @@ static inline int arch_read_can_lock(arch_rwlock_t *lock) | |||
243 | */ | 242 | */ |
244 | static inline int arch_write_can_lock(arch_rwlock_t *lock) | 243 | static inline int arch_write_can_lock(arch_rwlock_t *lock) |
245 | { | 244 | { |
246 | return (lock)->lock == RW_LOCK_BIAS; | 245 | return lock->write == WRITE_LOCK_CMP; |
247 | } | 246 | } |
248 | 247 | ||
249 | static inline void arch_read_lock(arch_rwlock_t *rw) | 248 | static inline void arch_read_lock(arch_rwlock_t *rw) |
250 | { | 249 | { |
251 | asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" | 250 | asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" |
252 | "jns 1f\n" | 251 | "jns 1f\n" |
253 | "call __read_lock_failed\n\t" | 252 | "call __read_lock_failed\n\t" |
254 | "1:\n" | 253 | "1:\n" |
@@ -257,47 +256,55 @@ static inline void arch_read_lock(arch_rwlock_t *rw) | |||
257 | 256 | ||
258 | static inline void arch_write_lock(arch_rwlock_t *rw) | 257 | static inline void arch_write_lock(arch_rwlock_t *rw) |
259 | { | 258 | { |
260 | asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" | 259 | asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" |
261 | "jz 1f\n" | 260 | "jz 1f\n" |
262 | "call __write_lock_failed\n\t" | 261 | "call __write_lock_failed\n\t" |
263 | "1:\n" | 262 | "1:\n" |
264 | ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); | 263 | ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS) |
264 | : "memory"); | ||
265 | } | 265 | } |
266 | 266 | ||
267 | static inline int arch_read_trylock(arch_rwlock_t *lock) | 267 | static inline int arch_read_trylock(arch_rwlock_t *lock) |
268 | { | 268 | { |
269 | atomic_t *count = (atomic_t *)lock; | 269 | READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock; |
270 | 270 | ||
271 | if (atomic_dec_return(count) >= 0) | 271 | if (READ_LOCK_ATOMIC(dec_return)(count) >= 0) |
272 | return 1; | 272 | return 1; |
273 | atomic_inc(count); | 273 | READ_LOCK_ATOMIC(inc)(count); |
274 | return 0; | 274 | return 0; |
275 | } | 275 | } |
276 | 276 | ||
277 | static inline int arch_write_trylock(arch_rwlock_t *lock) | 277 | static inline int arch_write_trylock(arch_rwlock_t *lock) |
278 | { | 278 | { |
279 | atomic_t *count = (atomic_t *)lock; | 279 | atomic_t *count = (atomic_t *)&lock->write; |
280 | 280 | ||
281 | if (atomic_sub_and_test(RW_LOCK_BIAS, count)) | 281 | if (atomic_sub_and_test(WRITE_LOCK_CMP, count)) |
282 | return 1; | 282 | return 1; |
283 | atomic_add(RW_LOCK_BIAS, count); | 283 | atomic_add(WRITE_LOCK_CMP, count); |
284 | return 0; | 284 | return 0; |
285 | } | 285 | } |
286 | 286 | ||
287 | static inline void arch_read_unlock(arch_rwlock_t *rw) | 287 | static inline void arch_read_unlock(arch_rwlock_t *rw) |
288 | { | 288 | { |
289 | asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); | 289 | asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" |
290 | :"+m" (rw->lock) : : "memory"); | ||
290 | } | 291 | } |
291 | 292 | ||
292 | static inline void arch_write_unlock(arch_rwlock_t *rw) | 293 | static inline void arch_write_unlock(arch_rwlock_t *rw) |
293 | { | 294 | { |
294 | asm volatile(LOCK_PREFIX "addl %1, %0" | 295 | asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" |
295 | : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); | 296 | : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); |
296 | } | 297 | } |
297 | 298 | ||
298 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) | 299 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
299 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) | 300 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
300 | 301 | ||
302 | #undef READ_LOCK_SIZE | ||
303 | #undef READ_LOCK_ATOMIC | ||
304 | #undef WRITE_LOCK_ADD | ||
305 | #undef WRITE_LOCK_SUB | ||
306 | #undef WRITE_LOCK_CMP | ||
307 | |||
301 | #define arch_spin_relax(lock) cpu_relax() | 308 | #define arch_spin_relax(lock) cpu_relax() |
302 | #define arch_read_relax(lock) cpu_relax() | 309 | #define arch_read_relax(lock) cpu_relax() |
303 | #define arch_write_relax(lock) cpu_relax() | 310 | #define arch_write_relax(lock) cpu_relax() |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index dcb48b2edc11..7c7a486fcb68 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -11,10 +11,6 @@ typedef struct arch_spinlock { | |||
11 | 11 | ||
12 | #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } | 12 | #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } |
13 | 13 | ||
14 | typedef struct { | 14 | #include <asm/rwlock.h> |
15 | unsigned int lock; | ||
16 | } arch_rwlock_t; | ||
17 | |||
18 | #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } | ||
19 | 15 | ||
20 | #endif /* _ASM_X86_SPINLOCK_TYPES_H */ | 16 | #endif /* _ASM_X86_SPINLOCK_TYPES_H */ |
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index 7bdec4e9b739..92b8aec06970 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h | |||
@@ -1,10 +1,12 @@ | |||
1 | #ifndef _ASM_X86_TIME_H | 1 | #ifndef _ASM_X86_TIME_H |
2 | #define _ASM_X86_TIME_H | 2 | #define _ASM_X86_TIME_H |
3 | 3 | ||
4 | extern void hpet_time_init(void); | 4 | #include <linux/clocksource.h> |
5 | |||
6 | #include <asm/mc146818rtc.h> | 5 | #include <asm/mc146818rtc.h> |
7 | 6 | ||
7 | extern void hpet_time_init(void); | ||
8 | extern void time_init(void); | 8 | extern void time_init(void); |
9 | 9 | ||
10 | extern struct clock_event_device *global_clock_event; | ||
11 | |||
10 | #endif /* _ASM_X86_TIME_H */ | 12 | #endif /* _ASM_X86_TIME_H */ |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0310da67307f..2bae0a513b40 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_TRAPS_H | 1 | #ifndef _ASM_X86_TRAPS_H |
2 | #define _ASM_X86_TRAPS_H | 2 | #define _ASM_X86_TRAPS_H |
3 | 3 | ||
4 | #include <linux/kprobes.h> | ||
5 | |||
4 | #include <asm/debugreg.h> | 6 | #include <asm/debugreg.h> |
5 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ | 7 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ |
6 | 8 | ||
@@ -38,6 +40,7 @@ asmlinkage void alignment_check(void); | |||
38 | asmlinkage void machine_check(void); | 40 | asmlinkage void machine_check(void); |
39 | #endif /* CONFIG_X86_MCE */ | 41 | #endif /* CONFIG_X86_MCE */ |
40 | asmlinkage void simd_coprocessor_error(void); | 42 | asmlinkage void simd_coprocessor_error(void); |
43 | asmlinkage void emulate_vsyscall(void); | ||
41 | 44 | ||
42 | dotraplinkage void do_divide_error(struct pt_regs *, long); | 45 | dotraplinkage void do_divide_error(struct pt_regs *, long); |
43 | dotraplinkage void do_debug(struct pt_regs *, long); | 46 | dotraplinkage void do_debug(struct pt_regs *, long); |
@@ -64,6 +67,7 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long); | |||
64 | dotraplinkage void do_machine_check(struct pt_regs *, long); | 67 | dotraplinkage void do_machine_check(struct pt_regs *, long); |
65 | #endif | 68 | #endif |
66 | dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); | 69 | dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); |
70 | dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long); | ||
67 | #ifdef CONFIG_X86_32 | 71 | #ifdef CONFIG_X86_32 |
68 | dotraplinkage void do_iret_error(struct pt_regs *, long); | 72 | dotraplinkage void do_iret_error(struct pt_regs *, long); |
69 | #endif | 73 | #endif |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 9db5583b6d38..83e2efd181e2 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void); | |||
51 | extern int check_tsc_unstable(void); | 51 | extern int check_tsc_unstable(void); |
52 | extern unsigned long native_calibrate_tsc(void); | 52 | extern unsigned long native_calibrate_tsc(void); |
53 | 53 | ||
54 | #ifdef CONFIG_X86_64 | ||
55 | extern cycles_t vread_tsc(void); | ||
56 | #endif | ||
57 | |||
58 | /* | 54 | /* |
59 | * Boot-time check whether the TSCs are synchronized across | 55 | * Boot-time check whether the TSCs are synchronized across |
60 | * all CPUs/cores: | 56 | * all CPUs/cores: |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 99ddd148a760..36361bf6fdd1 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -555,6 +555,9 @@ struct __large_struct { unsigned long buf[100]; }; | |||
555 | 555 | ||
556 | #endif /* CONFIG_X86_WP_WORKS_OK */ | 556 | #endif /* CONFIG_X86_WP_WORKS_OK */ |
557 | 557 | ||
558 | extern unsigned long | ||
559 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n); | ||
560 | |||
558 | /* | 561 | /* |
559 | * movsl can be slow when source and dest are not both 8-byte aligned | 562 | * movsl can be slow when source and dest are not both 8-byte aligned |
560 | */ | 563 | */ |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index a291c40efd43..37d369859c8e 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -67,7 +67,7 @@ | |||
67 | * we're using 655us, similar to UV1: 65 units of 10us | 67 | * we're using 655us, similar to UV1: 65 units of 10us |
68 | */ | 68 | */ |
69 | #define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL) | 69 | #define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL) |
70 | #define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (65*10UL) | 70 | #define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL) |
71 | 71 | ||
72 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (is_uv1_hub() ? \ | 72 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (is_uv1_hub() ? \ |
73 | UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ | 73 | UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ |
@@ -106,12 +106,20 @@ | |||
106 | #define DS_SOURCE_TIMEOUT 3 | 106 | #define DS_SOURCE_TIMEOUT 3 |
107 | /* | 107 | /* |
108 | * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 | 108 | * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 |
109 | * values 1 and 5 will not occur | 109 | * values 1 and 3 will not occur |
110 | * Decoded meaning ERROR BUSY AUX ERR | ||
111 | * ------------------------------- ---- ----- ------- | ||
112 | * IDLE 0 0 0 | ||
113 | * BUSY (active) 0 1 0 | ||
114 | * SW Ack Timeout (destination) 1 0 0 | ||
115 | * SW Ack INTD rejected (strong NACK) 1 0 1 | ||
116 | * Source Side Time Out Detected 1 1 0 | ||
117 | * Destination Side PUT Failed 1 1 1 | ||
110 | */ | 118 | */ |
111 | #define UV2H_DESC_IDLE 0 | 119 | #define UV2H_DESC_IDLE 0 |
112 | #define UV2H_DESC_DEST_TIMEOUT 2 | 120 | #define UV2H_DESC_BUSY 2 |
113 | #define UV2H_DESC_DEST_STRONG_NACK 3 | 121 | #define UV2H_DESC_DEST_TIMEOUT 4 |
114 | #define UV2H_DESC_BUSY 4 | 122 | #define UV2H_DESC_DEST_STRONG_NACK 5 |
115 | #define UV2H_DESC_SOURCE_TIMEOUT 6 | 123 | #define UV2H_DESC_SOURCE_TIMEOUT 6 |
116 | #define UV2H_DESC_DEST_PUT_ERR 7 | 124 | #define UV2H_DESC_DEST_PUT_ERR 7 |
117 | 125 | ||
@@ -183,7 +191,7 @@ | |||
183 | * 'base_dest_nasid' field of the header corresponds to the | 191 | * 'base_dest_nasid' field of the header corresponds to the |
184 | * destination nodeID associated with that specified bit. | 192 | * destination nodeID associated with that specified bit. |
185 | */ | 193 | */ |
186 | struct bau_targ_hubmask { | 194 | struct pnmask { |
187 | unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; | 195 | unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; |
188 | }; | 196 | }; |
189 | 197 | ||
@@ -314,7 +322,7 @@ struct bau_msg_header { | |||
314 | * Should be 64 bytes | 322 | * Should be 64 bytes |
315 | */ | 323 | */ |
316 | struct bau_desc { | 324 | struct bau_desc { |
317 | struct bau_targ_hubmask distribution; | 325 | struct pnmask distribution; |
318 | /* | 326 | /* |
319 | * message template, consisting of header and payload: | 327 | * message template, consisting of header and payload: |
320 | */ | 328 | */ |
@@ -488,6 +496,7 @@ struct bau_control { | |||
488 | struct bau_control *uvhub_master; | 496 | struct bau_control *uvhub_master; |
489 | struct bau_control *socket_master; | 497 | struct bau_control *socket_master; |
490 | struct ptc_stats *statp; | 498 | struct ptc_stats *statp; |
499 | cpumask_t *cpumask; | ||
491 | unsigned long timeout_interval; | 500 | unsigned long timeout_interval; |
492 | unsigned long set_bau_on_time; | 501 | unsigned long set_bau_on_time; |
493 | atomic_t active_descriptor_count; | 502 | atomic_t active_descriptor_count; |
@@ -526,90 +535,90 @@ struct bau_control { | |||
526 | struct hub_and_pnode *thp; | 535 | struct hub_and_pnode *thp; |
527 | }; | 536 | }; |
528 | 537 | ||
529 | static unsigned long read_mmr_uv2_status(void) | 538 | static inline unsigned long read_mmr_uv2_status(void) |
530 | { | 539 | { |
531 | return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2); | 540 | return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2); |
532 | } | 541 | } |
533 | 542 | ||
534 | static void write_mmr_data_broadcast(int pnode, unsigned long mmr_image) | 543 | static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image) |
535 | { | 544 | { |
536 | write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image); | 545 | write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image); |
537 | } | 546 | } |
538 | 547 | ||
539 | static void write_mmr_descriptor_base(int pnode, unsigned long mmr_image) | 548 | static inline void write_mmr_descriptor_base(int pnode, unsigned long mmr_image) |
540 | { | 549 | { |
541 | write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image); | 550 | write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image); |
542 | } | 551 | } |
543 | 552 | ||
544 | static void write_mmr_activation(unsigned long index) | 553 | static inline void write_mmr_activation(unsigned long index) |
545 | { | 554 | { |
546 | write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 555 | write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
547 | } | 556 | } |
548 | 557 | ||
549 | static void write_gmmr_activation(int pnode, unsigned long mmr_image) | 558 | static inline void write_gmmr_activation(int pnode, unsigned long mmr_image) |
550 | { | 559 | { |
551 | write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image); | 560 | write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image); |
552 | } | 561 | } |
553 | 562 | ||
554 | static void write_mmr_payload_first(int pnode, unsigned long mmr_image) | 563 | static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image) |
555 | { | 564 | { |
556 | write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image); | 565 | write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image); |
557 | } | 566 | } |
558 | 567 | ||
559 | static void write_mmr_payload_tail(int pnode, unsigned long mmr_image) | 568 | static inline void write_mmr_payload_tail(int pnode, unsigned long mmr_image) |
560 | { | 569 | { |
561 | write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image); | 570 | write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image); |
562 | } | 571 | } |
563 | 572 | ||
564 | static void write_mmr_payload_last(int pnode, unsigned long mmr_image) | 573 | static inline void write_mmr_payload_last(int pnode, unsigned long mmr_image) |
565 | { | 574 | { |
566 | write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image); | 575 | write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image); |
567 | } | 576 | } |
568 | 577 | ||
569 | static void write_mmr_misc_control(int pnode, unsigned long mmr_image) | 578 | static inline void write_mmr_misc_control(int pnode, unsigned long mmr_image) |
570 | { | 579 | { |
571 | write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 580 | write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
572 | } | 581 | } |
573 | 582 | ||
574 | static unsigned long read_mmr_misc_control(int pnode) | 583 | static inline unsigned long read_mmr_misc_control(int pnode) |
575 | { | 584 | { |
576 | return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL); | 585 | return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL); |
577 | } | 586 | } |
578 | 587 | ||
579 | static void write_mmr_sw_ack(unsigned long mr) | 588 | static inline void write_mmr_sw_ack(unsigned long mr) |
580 | { | 589 | { |
581 | uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); | 590 | uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); |
582 | } | 591 | } |
583 | 592 | ||
584 | static unsigned long read_mmr_sw_ack(void) | 593 | static inline unsigned long read_mmr_sw_ack(void) |
585 | { | 594 | { |
586 | return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | 595 | return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); |
587 | } | 596 | } |
588 | 597 | ||
589 | static unsigned long read_gmmr_sw_ack(int pnode) | 598 | static inline unsigned long read_gmmr_sw_ack(int pnode) |
590 | { | 599 | { |
591 | return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | 600 | return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); |
592 | } | 601 | } |
593 | 602 | ||
594 | static void write_mmr_data_config(int pnode, unsigned long mr) | 603 | static inline void write_mmr_data_config(int pnode, unsigned long mr) |
595 | { | 604 | { |
596 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr); | 605 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr); |
597 | } | 606 | } |
598 | 607 | ||
599 | static inline int bau_uvhub_isset(int uvhub, struct bau_targ_hubmask *dstp) | 608 | static inline int bau_uvhub_isset(int uvhub, struct pnmask *dstp) |
600 | { | 609 | { |
601 | return constant_test_bit(uvhub, &dstp->bits[0]); | 610 | return constant_test_bit(uvhub, &dstp->bits[0]); |
602 | } | 611 | } |
603 | static inline void bau_uvhub_set(int pnode, struct bau_targ_hubmask *dstp) | 612 | static inline void bau_uvhub_set(int pnode, struct pnmask *dstp) |
604 | { | 613 | { |
605 | __set_bit(pnode, &dstp->bits[0]); | 614 | __set_bit(pnode, &dstp->bits[0]); |
606 | } | 615 | } |
607 | static inline void bau_uvhubs_clear(struct bau_targ_hubmask *dstp, | 616 | static inline void bau_uvhubs_clear(struct pnmask *dstp, |
608 | int nbits) | 617 | int nbits) |
609 | { | 618 | { |
610 | bitmap_zero(&dstp->bits[0], nbits); | 619 | bitmap_zero(&dstp->bits[0], nbits); |
611 | } | 620 | } |
612 | static inline int bau_uvhub_weight(struct bau_targ_hubmask *dstp) | 621 | static inline int bau_uvhub_weight(struct pnmask *dstp) |
613 | { | 622 | { |
614 | return bitmap_weight((unsigned long *)&dstp->bits[0], | 623 | return bitmap_weight((unsigned long *)&dstp->bits[0], |
615 | UV_DISTRIBUTION_SIZE); | 624 | UV_DISTRIBUTION_SIZE); |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 4be52c863448..10474fb1185d 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -61,1689 +61,2016 @@ | |||
61 | /* Compat: if this #define is present, UV headers support UV2 */ | 61 | /* Compat: if this #define is present, UV headers support UV2 */ |
62 | #define UV2_HUB_IS_SUPPORTED 1 | 62 | #define UV2_HUB_IS_SUPPORTED 1 |
63 | 63 | ||
64 | /* KABI compat: if this #define is present, KABI hacks are present */ | ||
65 | #define UV2_HUB_KABI_HACKS 1 | ||
66 | |||
67 | /* ========================================================================= */ | 64 | /* ========================================================================= */ |
68 | /* UVH_BAU_DATA_BROADCAST */ | 65 | /* UVH_BAU_DATA_BROADCAST */ |
69 | /* ========================================================================= */ | 66 | /* ========================================================================= */ |
70 | #define UVH_BAU_DATA_BROADCAST 0x61688UL | 67 | #define UVH_BAU_DATA_BROADCAST 0x61688UL |
71 | #define UVH_BAU_DATA_BROADCAST_32 0x440 | 68 | #define UVH_BAU_DATA_BROADCAST_32 0x440 |
72 | 69 | ||
73 | #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 | 70 | #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 |
74 | #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL | 71 | #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL |
75 | 72 | ||
76 | union uvh_bau_data_broadcast_u { | 73 | union uvh_bau_data_broadcast_u { |
77 | unsigned long v; | 74 | unsigned long v; |
78 | struct uvh_bau_data_broadcast_s { | 75 | struct uvh_bau_data_broadcast_s { |
79 | unsigned long enable : 1; /* RW */ | 76 | unsigned long enable:1; /* RW */ |
80 | unsigned long rsvd_1_63: 63; /* */ | 77 | unsigned long rsvd_1_63:63; |
81 | } s; | 78 | } s; |
82 | }; | 79 | }; |
83 | 80 | ||
84 | /* ========================================================================= */ | 81 | /* ========================================================================= */ |
85 | /* UVH_BAU_DATA_CONFIG */ | 82 | /* UVH_BAU_DATA_CONFIG */ |
86 | /* ========================================================================= */ | 83 | /* ========================================================================= */ |
87 | #define UVH_BAU_DATA_CONFIG 0x61680UL | 84 | #define UVH_BAU_DATA_CONFIG 0x61680UL |
88 | #define UVH_BAU_DATA_CONFIG_32 0x438 | 85 | #define UVH_BAU_DATA_CONFIG_32 0x438 |
89 | 86 | ||
90 | #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 | 87 | #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 |
91 | #define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL | 88 | #define UVH_BAU_DATA_CONFIG_DM_SHFT 8 |
92 | #define UVH_BAU_DATA_CONFIG_DM_SHFT 8 | 89 | #define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 |
93 | #define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL | 90 | #define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 |
94 | #define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 | 91 | #define UVH_BAU_DATA_CONFIG_P_SHFT 13 |
95 | #define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL | 92 | #define UVH_BAU_DATA_CONFIG_T_SHFT 15 |
96 | #define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 | 93 | #define UVH_BAU_DATA_CONFIG_M_SHFT 16 |
97 | #define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL | 94 | #define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 |
98 | #define UVH_BAU_DATA_CONFIG_P_SHFT 13 | 95 | #define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL |
99 | #define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL | 96 | #define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL |
100 | #define UVH_BAU_DATA_CONFIG_T_SHFT 15 | 97 | #define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL |
101 | #define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL | 98 | #define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL |
102 | #define UVH_BAU_DATA_CONFIG_M_SHFT 16 | 99 | #define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL |
103 | #define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL | 100 | #define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL |
104 | #define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 | 101 | #define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL |
105 | #define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | 102 | #define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL |
106 | 103 | ||
107 | union uvh_bau_data_config_u { | 104 | union uvh_bau_data_config_u { |
108 | unsigned long v; | 105 | unsigned long v; |
109 | struct uvh_bau_data_config_s { | 106 | struct uvh_bau_data_config_s { |
110 | unsigned long vector_ : 8; /* RW */ | 107 | unsigned long vector_:8; /* RW */ |
111 | unsigned long dm : 3; /* RW */ | 108 | unsigned long dm:3; /* RW */ |
112 | unsigned long destmode : 1; /* RW */ | 109 | unsigned long destmode:1; /* RW */ |
113 | unsigned long status : 1; /* RO */ | 110 | unsigned long status:1; /* RO */ |
114 | unsigned long p : 1; /* RO */ | 111 | unsigned long p:1; /* RO */ |
115 | unsigned long rsvd_14 : 1; /* */ | 112 | unsigned long rsvd_14:1; |
116 | unsigned long t : 1; /* RO */ | 113 | unsigned long t:1; /* RO */ |
117 | unsigned long m : 1; /* RW */ | 114 | unsigned long m:1; /* RW */ |
118 | unsigned long rsvd_17_31: 15; /* */ | 115 | unsigned long rsvd_17_31:15; |
119 | unsigned long apic_id : 32; /* RW */ | 116 | unsigned long apic_id:32; /* RW */ |
120 | } s; | 117 | } s; |
121 | }; | 118 | }; |
122 | 119 | ||
123 | /* ========================================================================= */ | 120 | /* ========================================================================= */ |
124 | /* UVH_EVENT_OCCURRED0 */ | 121 | /* UVH_EVENT_OCCURRED0 */ |
125 | /* ========================================================================= */ | 122 | /* ========================================================================= */ |
126 | #define UVH_EVENT_OCCURRED0 0x70000UL | 123 | #define UVH_EVENT_OCCURRED0 0x70000UL |
127 | #define UVH_EVENT_OCCURRED0_32 0x5e8 | 124 | #define UVH_EVENT_OCCURRED0_32 0x5e8 |
128 | 125 | ||
129 | #define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 | 126 | #define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 |
130 | #define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL | 127 | #define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 |
131 | #define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 | 128 | #define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 |
132 | #define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL | 129 | #define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 |
133 | #define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 | 130 | #define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT 4 |
134 | #define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL | 131 | #define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT 5 |
135 | #define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 | 132 | #define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT 6 |
136 | #define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL | 133 | #define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 |
137 | #define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT 4 | 134 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 |
138 | #define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL | 135 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 |
139 | #define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT 5 | 136 | #define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 |
140 | #define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL | 137 | #define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 |
141 | #define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT 6 | 138 | #define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 |
142 | #define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL | 139 | #define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 |
143 | #define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 | 140 | #define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 |
144 | #define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL | 141 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 |
145 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 | 142 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 |
146 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL | 143 | #define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 |
147 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 | 144 | #define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 |
148 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL | 145 | #define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 |
149 | #define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 | 146 | #define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 |
150 | #define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL | 147 | #define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 |
151 | #define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 | 148 | #define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 |
152 | #define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL | 149 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 |
153 | #define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 | 150 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 |
154 | #define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL | 151 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 |
155 | #define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 | 152 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 |
156 | #define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL | 153 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 |
157 | #define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 | 154 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 |
158 | #define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL | 155 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 |
159 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 | 156 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 |
160 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL | 157 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 |
161 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 | 158 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 |
162 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL | 159 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 |
163 | #define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 | 160 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 |
164 | #define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL | 161 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 |
165 | #define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 | 162 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 |
166 | #define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL | 163 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 |
167 | #define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 | 164 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 |
168 | #define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL | 165 | #define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 |
169 | #define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 | 166 | #define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 |
170 | #define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL | 167 | #define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 |
171 | #define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 | 168 | #define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 |
172 | #define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL | 169 | #define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT 43 |
173 | #define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 | 170 | #define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 |
174 | #define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL | 171 | #define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT 45 |
175 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 | 172 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 |
176 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL | 173 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 |
177 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 | 174 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 |
178 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL | 175 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 |
179 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 | 176 | #define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 |
180 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL | 177 | #define UV1H_EVENT_OCCURRED0_RTC0_SHFT 51 |
181 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 | 178 | #define UV1H_EVENT_OCCURRED0_RTC1_SHFT 52 |
182 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL | 179 | #define UV1H_EVENT_OCCURRED0_RTC2_SHFT 53 |
183 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 | 180 | #define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 |
184 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL | 181 | #define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 |
185 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 | 182 | #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 |
186 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL | 183 | #define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL |
187 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 | 184 | #define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL |
188 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL | 185 | #define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL |
189 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 | 186 | #define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL |
190 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL | 187 | #define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL |
191 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 | 188 | #define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL |
192 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL | 189 | #define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL |
193 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 | 190 | #define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL |
194 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL | 191 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL |
195 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 | 192 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL |
196 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL | 193 | #define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL |
197 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 | 194 | #define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL |
198 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL | 195 | #define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL |
199 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 | 196 | #define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL |
200 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL | 197 | #define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL |
201 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 | 198 | #define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL |
202 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL | 199 | #define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL |
203 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 | 200 | #define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL |
204 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL | 201 | #define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL |
205 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 | 202 | #define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL |
206 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL | 203 | #define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL |
207 | #define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 | 204 | #define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL |
208 | #define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL | 205 | #define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL |
209 | #define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 | 206 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL |
210 | #define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL | 207 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL |
211 | #define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 | 208 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL |
212 | #define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL | 209 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL |
213 | #define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 | 210 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL |
214 | #define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL | 211 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL |
215 | #define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT 43 | 212 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL |
216 | #define UV1H_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL | 213 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL |
217 | #define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 | 214 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL |
218 | #define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL | 215 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL |
219 | #define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT 45 | 216 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL |
220 | #define UV1H_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL | 217 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL |
221 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 | 218 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL |
222 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL | 219 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL |
223 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 | 220 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL |
224 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL | 221 | #define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL |
225 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 | 222 | #define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL |
226 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL | 223 | #define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL |
227 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 | 224 | #define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL |
228 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL | 225 | #define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL |
229 | #define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 | 226 | #define UV1H_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL |
230 | #define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL | 227 | #define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL |
231 | #define UV1H_EVENT_OCCURRED0_RTC0_SHFT 51 | 228 | #define UV1H_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL |
232 | #define UV1H_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL | 229 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL |
233 | #define UV1H_EVENT_OCCURRED0_RTC1_SHFT 52 | 230 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL |
234 | #define UV1H_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL | 231 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL |
235 | #define UV1H_EVENT_OCCURRED0_RTC2_SHFT 53 | 232 | #define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL |
236 | #define UV1H_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL | 233 | #define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL |
237 | #define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 | 234 | #define UV1H_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL |
238 | #define UV1H_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL | 235 | #define UV1H_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL |
239 | #define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 | 236 | #define UV1H_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL |
240 | #define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL | 237 | #define UV1H_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL |
241 | #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 | 238 | #define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL |
242 | #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL | 239 | #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL |
243 | 240 | ||
244 | #define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 | 241 | #define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 |
245 | #define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL | 242 | #define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 |
246 | #define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 | 243 | #define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 |
247 | #define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL | 244 | #define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 |
248 | #define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 | 245 | #define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 |
249 | #define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL | 246 | #define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 |
250 | #define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 | 247 | #define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 |
251 | #define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL | 248 | #define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 |
252 | #define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 | 249 | #define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 |
253 | #define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL | 250 | #define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 |
254 | #define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 | 251 | #define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 |
255 | #define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL | 252 | #define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 |
256 | #define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 | 253 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 |
257 | #define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL | 254 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 |
258 | #define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 | 255 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 |
259 | #define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL | 256 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 |
260 | #define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 | 257 | #define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 |
261 | #define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL | 258 | #define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 |
262 | #define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 | 259 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 |
263 | #define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL | 260 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 |
264 | #define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 | 261 | #define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 |
265 | #define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL | 262 | #define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 |
266 | #define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 | 263 | #define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 |
267 | #define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL | 264 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 |
268 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 | 265 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 |
269 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL | 266 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 |
270 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 | 267 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 |
271 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL | 268 | #define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 |
272 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 | 269 | #define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 |
273 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL | 270 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 |
274 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 | 271 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 |
275 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL | 272 | #define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 |
276 | #define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 | 273 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 |
277 | #define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL | 274 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 |
278 | #define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 | 275 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 |
279 | #define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL | 276 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 |
280 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 | 277 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 |
281 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL | 278 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 |
282 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 | 279 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 |
283 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL | 280 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 |
284 | #define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 | 281 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 |
285 | #define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL | 282 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 |
286 | #define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 | 283 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 |
287 | #define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL | 284 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 |
288 | #define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 | 285 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 |
289 | #define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL | 286 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 |
290 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 | 287 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 |
291 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL | 288 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 |
292 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 | 289 | #define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 |
293 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL | 290 | #define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 |
294 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 | 291 | #define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 |
295 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL | 292 | #define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 |
296 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 | 293 | #define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 |
297 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL | 294 | #define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 |
298 | #define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 | 295 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 |
299 | #define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL | 296 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 |
300 | #define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 | 297 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 |
301 | #define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL | 298 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 |
302 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 | 299 | #define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 |
303 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL | 300 | #define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL |
304 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 | 301 | #define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL |
305 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL | 302 | #define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL |
306 | #define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 | 303 | #define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL |
307 | #define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL | 304 | #define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL |
308 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 | 305 | #define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL |
309 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL | 306 | #define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL |
310 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 | 307 | #define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL |
311 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL | 308 | #define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL |
312 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 | 309 | #define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL |
313 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL | 310 | #define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL |
314 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 | 311 | #define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL |
315 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL | 312 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL |
316 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 | 313 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL |
317 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL | 314 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL |
318 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 | 315 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL |
319 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL | 316 | #define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL |
320 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 | 317 | #define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL |
321 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL | 318 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL |
322 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 | 319 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL |
323 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL | 320 | #define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL |
324 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 | 321 | #define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL |
325 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL | 322 | #define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL |
326 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 | 323 | #define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL |
327 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL | 324 | #define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL |
328 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 | 325 | #define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL |
329 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL | 326 | #define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL |
330 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 | 327 | #define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL |
331 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL | 328 | #define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL |
332 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 | 329 | #define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL |
333 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL | 330 | #define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL |
334 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 | 331 | #define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL |
335 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL | 332 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL |
336 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 | 333 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL |
337 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL | 334 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL |
338 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 | 335 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL |
339 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL | 336 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL |
340 | #define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 | 337 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL |
341 | #define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL | 338 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL |
342 | #define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 | 339 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL |
343 | #define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL | 340 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL |
344 | #define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 | 341 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL |
345 | #define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL | 342 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL |
346 | #define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 | 343 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL |
347 | #define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL | 344 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL |
348 | #define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 | 345 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL |
349 | #define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL | 346 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL |
350 | #define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 | 347 | #define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL |
351 | #define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL | 348 | #define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL |
352 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 | 349 | #define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL |
353 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL | 350 | #define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL |
354 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 | 351 | #define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL |
355 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL | 352 | #define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL |
356 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 | 353 | #define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL |
357 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL | 354 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL |
358 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 | 355 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL |
359 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL | 356 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL |
360 | #define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 | 357 | #define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL |
361 | #define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL | 358 | #define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL |
362 | 359 | ||
363 | union uvh_event_occurred0_u { | 360 | union uvh_event_occurred0_u { |
364 | unsigned long v; | 361 | unsigned long v; |
365 | struct uv1h_event_occurred0_s { | 362 | struct uv1h_event_occurred0_s { |
366 | unsigned long lb_hcerr : 1; /* RW, W1C */ | 363 | unsigned long lb_hcerr:1; /* RW, W1C */ |
367 | unsigned long gr0_hcerr : 1; /* RW, W1C */ | 364 | unsigned long gr0_hcerr:1; /* RW, W1C */ |
368 | unsigned long gr1_hcerr : 1; /* RW, W1C */ | 365 | unsigned long gr1_hcerr:1; /* RW, W1C */ |
369 | unsigned long lh_hcerr : 1; /* RW, W1C */ | 366 | unsigned long lh_hcerr:1; /* RW, W1C */ |
370 | unsigned long rh_hcerr : 1; /* RW, W1C */ | 367 | unsigned long rh_hcerr:1; /* RW, W1C */ |
371 | unsigned long xn_hcerr : 1; /* RW, W1C */ | 368 | unsigned long xn_hcerr:1; /* RW, W1C */ |
372 | unsigned long si_hcerr : 1; /* RW, W1C */ | 369 | unsigned long si_hcerr:1; /* RW, W1C */ |
373 | unsigned long lb_aoerr0 : 1; /* RW, W1C */ | 370 | unsigned long lb_aoerr0:1; /* RW, W1C */ |
374 | unsigned long gr0_aoerr0 : 1; /* RW, W1C */ | 371 | unsigned long gr0_aoerr0:1; /* RW, W1C */ |
375 | unsigned long gr1_aoerr0 : 1; /* RW, W1C */ | 372 | unsigned long gr1_aoerr0:1; /* RW, W1C */ |
376 | unsigned long lh_aoerr0 : 1; /* RW, W1C */ | 373 | unsigned long lh_aoerr0:1; /* RW, W1C */ |
377 | unsigned long rh_aoerr0 : 1; /* RW, W1C */ | 374 | unsigned long rh_aoerr0:1; /* RW, W1C */ |
378 | unsigned long xn_aoerr0 : 1; /* RW, W1C */ | 375 | unsigned long xn_aoerr0:1; /* RW, W1C */ |
379 | unsigned long si_aoerr0 : 1; /* RW, W1C */ | 376 | unsigned long si_aoerr0:1; /* RW, W1C */ |
380 | unsigned long lb_aoerr1 : 1; /* RW, W1C */ | 377 | unsigned long lb_aoerr1:1; /* RW, W1C */ |
381 | unsigned long gr0_aoerr1 : 1; /* RW, W1C */ | 378 | unsigned long gr0_aoerr1:1; /* RW, W1C */ |
382 | unsigned long gr1_aoerr1 : 1; /* RW, W1C */ | 379 | unsigned long gr1_aoerr1:1; /* RW, W1C */ |
383 | unsigned long lh_aoerr1 : 1; /* RW, W1C */ | 380 | unsigned long lh_aoerr1:1; /* RW, W1C */ |
384 | unsigned long rh_aoerr1 : 1; /* RW, W1C */ | 381 | unsigned long rh_aoerr1:1; /* RW, W1C */ |
385 | unsigned long xn_aoerr1 : 1; /* RW, W1C */ | 382 | unsigned long xn_aoerr1:1; /* RW, W1C */ |
386 | unsigned long si_aoerr1 : 1; /* RW, W1C */ | 383 | unsigned long si_aoerr1:1; /* RW, W1C */ |
387 | unsigned long rh_vpi_int : 1; /* RW, W1C */ | 384 | unsigned long rh_vpi_int:1; /* RW, W1C */ |
388 | unsigned long system_shutdown_int : 1; /* RW, W1C */ | 385 | unsigned long system_shutdown_int:1; /* RW, W1C */ |
389 | unsigned long lb_irq_int_0 : 1; /* RW, W1C */ | 386 | unsigned long lb_irq_int_0:1; /* RW, W1C */ |
390 | unsigned long lb_irq_int_1 : 1; /* RW, W1C */ | 387 | unsigned long lb_irq_int_1:1; /* RW, W1C */ |
391 | unsigned long lb_irq_int_2 : 1; /* RW, W1C */ | 388 | unsigned long lb_irq_int_2:1; /* RW, W1C */ |
392 | unsigned long lb_irq_int_3 : 1; /* RW, W1C */ | 389 | unsigned long lb_irq_int_3:1; /* RW, W1C */ |
393 | unsigned long lb_irq_int_4 : 1; /* RW, W1C */ | 390 | unsigned long lb_irq_int_4:1; /* RW, W1C */ |
394 | unsigned long lb_irq_int_5 : 1; /* RW, W1C */ | 391 | unsigned long lb_irq_int_5:1; /* RW, W1C */ |
395 | unsigned long lb_irq_int_6 : 1; /* RW, W1C */ | 392 | unsigned long lb_irq_int_6:1; /* RW, W1C */ |
396 | unsigned long lb_irq_int_7 : 1; /* RW, W1C */ | 393 | unsigned long lb_irq_int_7:1; /* RW, W1C */ |
397 | unsigned long lb_irq_int_8 : 1; /* RW, W1C */ | 394 | unsigned long lb_irq_int_8:1; /* RW, W1C */ |
398 | unsigned long lb_irq_int_9 : 1; /* RW, W1C */ | 395 | unsigned long lb_irq_int_9:1; /* RW, W1C */ |
399 | unsigned long lb_irq_int_10 : 1; /* RW, W1C */ | 396 | unsigned long lb_irq_int_10:1; /* RW, W1C */ |
400 | unsigned long lb_irq_int_11 : 1; /* RW, W1C */ | 397 | unsigned long lb_irq_int_11:1; /* RW, W1C */ |
401 | unsigned long lb_irq_int_12 : 1; /* RW, W1C */ | 398 | unsigned long lb_irq_int_12:1; /* RW, W1C */ |
402 | unsigned long lb_irq_int_13 : 1; /* RW, W1C */ | 399 | unsigned long lb_irq_int_13:1; /* RW, W1C */ |
403 | unsigned long lb_irq_int_14 : 1; /* RW, W1C */ | 400 | unsigned long lb_irq_int_14:1; /* RW, W1C */ |
404 | unsigned long lb_irq_int_15 : 1; /* RW, W1C */ | 401 | unsigned long lb_irq_int_15:1; /* RW, W1C */ |
405 | unsigned long l1_nmi_int : 1; /* RW, W1C */ | 402 | unsigned long l1_nmi_int:1; /* RW, W1C */ |
406 | unsigned long stop_clock : 1; /* RW, W1C */ | 403 | unsigned long stop_clock:1; /* RW, W1C */ |
407 | unsigned long asic_to_l1 : 1; /* RW, W1C */ | 404 | unsigned long asic_to_l1:1; /* RW, W1C */ |
408 | unsigned long l1_to_asic : 1; /* RW, W1C */ | 405 | unsigned long l1_to_asic:1; /* RW, W1C */ |
409 | unsigned long ltc_int : 1; /* RW, W1C */ | 406 | unsigned long ltc_int:1; /* RW, W1C */ |
410 | unsigned long la_seq_trigger : 1; /* RW, W1C */ | 407 | unsigned long la_seq_trigger:1; /* RW, W1C */ |
411 | unsigned long ipi_int : 1; /* RW, W1C */ | 408 | unsigned long ipi_int:1; /* RW, W1C */ |
412 | unsigned long extio_int0 : 1; /* RW, W1C */ | 409 | unsigned long extio_int0:1; /* RW, W1C */ |
413 | unsigned long extio_int1 : 1; /* RW, W1C */ | 410 | unsigned long extio_int1:1; /* RW, W1C */ |
414 | unsigned long extio_int2 : 1; /* RW, W1C */ | 411 | unsigned long extio_int2:1; /* RW, W1C */ |
415 | unsigned long extio_int3 : 1; /* RW, W1C */ | 412 | unsigned long extio_int3:1; /* RW, W1C */ |
416 | unsigned long profile_int : 1; /* RW, W1C */ | 413 | unsigned long profile_int:1; /* RW, W1C */ |
417 | unsigned long rtc0 : 1; /* RW, W1C */ | 414 | unsigned long rtc0:1; /* RW, W1C */ |
418 | unsigned long rtc1 : 1; /* RW, W1C */ | 415 | unsigned long rtc1:1; /* RW, W1C */ |
419 | unsigned long rtc2 : 1; /* RW, W1C */ | 416 | unsigned long rtc2:1; /* RW, W1C */ |
420 | unsigned long rtc3 : 1; /* RW, W1C */ | 417 | unsigned long rtc3:1; /* RW, W1C */ |
421 | unsigned long bau_data : 1; /* RW, W1C */ | 418 | unsigned long bau_data:1; /* RW, W1C */ |
422 | unsigned long power_management_req : 1; /* RW, W1C */ | 419 | unsigned long power_management_req:1; /* RW, W1C */ |
423 | unsigned long rsvd_57_63 : 7; /* */ | 420 | unsigned long rsvd_57_63:7; |
424 | } s1; | 421 | } s1; |
425 | struct uv2h_event_occurred0_s { | 422 | struct uv2h_event_occurred0_s { |
426 | unsigned long lb_hcerr : 1; /* RW */ | 423 | unsigned long lb_hcerr:1; /* RW */ |
427 | unsigned long qp_hcerr : 1; /* RW */ | 424 | unsigned long qp_hcerr:1; /* RW */ |
428 | unsigned long rh_hcerr : 1; /* RW */ | 425 | unsigned long rh_hcerr:1; /* RW */ |
429 | unsigned long lh0_hcerr : 1; /* RW */ | 426 | unsigned long lh0_hcerr:1; /* RW */ |
430 | unsigned long lh1_hcerr : 1; /* RW */ | 427 | unsigned long lh1_hcerr:1; /* RW */ |
431 | unsigned long gr0_hcerr : 1; /* RW */ | 428 | unsigned long gr0_hcerr:1; /* RW */ |
432 | unsigned long gr1_hcerr : 1; /* RW */ | 429 | unsigned long gr1_hcerr:1; /* RW */ |
433 | unsigned long ni0_hcerr : 1; /* RW */ | 430 | unsigned long ni0_hcerr:1; /* RW */ |
434 | unsigned long ni1_hcerr : 1; /* RW */ | 431 | unsigned long ni1_hcerr:1; /* RW */ |
435 | unsigned long lb_aoerr0 : 1; /* RW */ | 432 | unsigned long lb_aoerr0:1; /* RW */ |
436 | unsigned long qp_aoerr0 : 1; /* RW */ | 433 | unsigned long qp_aoerr0:1; /* RW */ |
437 | unsigned long rh_aoerr0 : 1; /* RW */ | 434 | unsigned long rh_aoerr0:1; /* RW */ |
438 | unsigned long lh0_aoerr0 : 1; /* RW */ | 435 | unsigned long lh0_aoerr0:1; /* RW */ |
439 | unsigned long lh1_aoerr0 : 1; /* RW */ | 436 | unsigned long lh1_aoerr0:1; /* RW */ |
440 | unsigned long gr0_aoerr0 : 1; /* RW */ | 437 | unsigned long gr0_aoerr0:1; /* RW */ |
441 | unsigned long gr1_aoerr0 : 1; /* RW */ | 438 | unsigned long gr1_aoerr0:1; /* RW */ |
442 | unsigned long xb_aoerr0 : 1; /* RW */ | 439 | unsigned long xb_aoerr0:1; /* RW */ |
443 | unsigned long rt_aoerr0 : 1; /* RW */ | 440 | unsigned long rt_aoerr0:1; /* RW */ |
444 | unsigned long ni0_aoerr0 : 1; /* RW */ | 441 | unsigned long ni0_aoerr0:1; /* RW */ |
445 | unsigned long ni1_aoerr0 : 1; /* RW */ | 442 | unsigned long ni1_aoerr0:1; /* RW */ |
446 | unsigned long lb_aoerr1 : 1; /* RW */ | 443 | unsigned long lb_aoerr1:1; /* RW */ |
447 | unsigned long qp_aoerr1 : 1; /* RW */ | 444 | unsigned long qp_aoerr1:1; /* RW */ |
448 | unsigned long rh_aoerr1 : 1; /* RW */ | 445 | unsigned long rh_aoerr1:1; /* RW */ |
449 | unsigned long lh0_aoerr1 : 1; /* RW */ | 446 | unsigned long lh0_aoerr1:1; /* RW */ |
450 | unsigned long lh1_aoerr1 : 1; /* RW */ | 447 | unsigned long lh1_aoerr1:1; /* RW */ |
451 | unsigned long gr0_aoerr1 : 1; /* RW */ | 448 | unsigned long gr0_aoerr1:1; /* RW */ |
452 | unsigned long gr1_aoerr1 : 1; /* RW */ | 449 | unsigned long gr1_aoerr1:1; /* RW */ |
453 | unsigned long xb_aoerr1 : 1; /* RW */ | 450 | unsigned long xb_aoerr1:1; /* RW */ |
454 | unsigned long rt_aoerr1 : 1; /* RW */ | 451 | unsigned long rt_aoerr1:1; /* RW */ |
455 | unsigned long ni0_aoerr1 : 1; /* RW */ | 452 | unsigned long ni0_aoerr1:1; /* RW */ |
456 | unsigned long ni1_aoerr1 : 1; /* RW */ | 453 | unsigned long ni1_aoerr1:1; /* RW */ |
457 | unsigned long system_shutdown_int : 1; /* RW */ | 454 | unsigned long system_shutdown_int:1; /* RW */ |
458 | unsigned long lb_irq_int_0 : 1; /* RW */ | 455 | unsigned long lb_irq_int_0:1; /* RW */ |
459 | unsigned long lb_irq_int_1 : 1; /* RW */ | 456 | unsigned long lb_irq_int_1:1; /* RW */ |
460 | unsigned long lb_irq_int_2 : 1; /* RW */ | 457 | unsigned long lb_irq_int_2:1; /* RW */ |
461 | unsigned long lb_irq_int_3 : 1; /* RW */ | 458 | unsigned long lb_irq_int_3:1; /* RW */ |
462 | unsigned long lb_irq_int_4 : 1; /* RW */ | 459 | unsigned long lb_irq_int_4:1; /* RW */ |
463 | unsigned long lb_irq_int_5 : 1; /* RW */ | 460 | unsigned long lb_irq_int_5:1; /* RW */ |
464 | unsigned long lb_irq_int_6 : 1; /* RW */ | 461 | unsigned long lb_irq_int_6:1; /* RW */ |
465 | unsigned long lb_irq_int_7 : 1; /* RW */ | 462 | unsigned long lb_irq_int_7:1; /* RW */ |
466 | unsigned long lb_irq_int_8 : 1; /* RW */ | 463 | unsigned long lb_irq_int_8:1; /* RW */ |
467 | unsigned long lb_irq_int_9 : 1; /* RW */ | 464 | unsigned long lb_irq_int_9:1; /* RW */ |
468 | unsigned long lb_irq_int_10 : 1; /* RW */ | 465 | unsigned long lb_irq_int_10:1; /* RW */ |
469 | unsigned long lb_irq_int_11 : 1; /* RW */ | 466 | unsigned long lb_irq_int_11:1; /* RW */ |
470 | unsigned long lb_irq_int_12 : 1; /* RW */ | 467 | unsigned long lb_irq_int_12:1; /* RW */ |
471 | unsigned long lb_irq_int_13 : 1; /* RW */ | 468 | unsigned long lb_irq_int_13:1; /* RW */ |
472 | unsigned long lb_irq_int_14 : 1; /* RW */ | 469 | unsigned long lb_irq_int_14:1; /* RW */ |
473 | unsigned long lb_irq_int_15 : 1; /* RW */ | 470 | unsigned long lb_irq_int_15:1; /* RW */ |
474 | unsigned long l1_nmi_int : 1; /* RW */ | 471 | unsigned long l1_nmi_int:1; /* RW */ |
475 | unsigned long stop_clock : 1; /* RW */ | 472 | unsigned long stop_clock:1; /* RW */ |
476 | unsigned long asic_to_l1 : 1; /* RW */ | 473 | unsigned long asic_to_l1:1; /* RW */ |
477 | unsigned long l1_to_asic : 1; /* RW */ | 474 | unsigned long l1_to_asic:1; /* RW */ |
478 | unsigned long la_seq_trigger : 1; /* RW */ | 475 | unsigned long la_seq_trigger:1; /* RW */ |
479 | unsigned long ipi_int : 1; /* RW */ | 476 | unsigned long ipi_int:1; /* RW */ |
480 | unsigned long extio_int0 : 1; /* RW */ | 477 | unsigned long extio_int0:1; /* RW */ |
481 | unsigned long extio_int1 : 1; /* RW */ | 478 | unsigned long extio_int1:1; /* RW */ |
482 | unsigned long extio_int2 : 1; /* RW */ | 479 | unsigned long extio_int2:1; /* RW */ |
483 | unsigned long extio_int3 : 1; /* RW */ | 480 | unsigned long extio_int3:1; /* RW */ |
484 | unsigned long profile_int : 1; /* RW */ | 481 | unsigned long profile_int:1; /* RW */ |
485 | unsigned long rsvd_59_63 : 5; /* */ | 482 | unsigned long rsvd_59_63:5; |
486 | } s2; | 483 | } s2; |
487 | }; | 484 | }; |
488 | 485 | ||
489 | /* ========================================================================= */ | 486 | /* ========================================================================= */ |
490 | /* UVH_EVENT_OCCURRED0_ALIAS */ | 487 | /* UVH_EVENT_OCCURRED0_ALIAS */ |
491 | /* ========================================================================= */ | 488 | /* ========================================================================= */ |
492 | #define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL | 489 | #define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL |
493 | #define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 | 490 | #define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 |
494 | 491 | ||
495 | /* ========================================================================= */ | 492 | /* ========================================================================= */ |
496 | /* UVH_GR0_TLB_INT0_CONFIG */ | 493 | /* UVH_GR0_TLB_INT0_CONFIG */ |
497 | /* ========================================================================= */ | 494 | /* ========================================================================= */ |
498 | #define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL | 495 | #define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL |
499 | 496 | ||
500 | #define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 | 497 | #define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 |
501 | #define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL | 498 | #define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 |
502 | #define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 | 499 | #define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 |
503 | #define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL | 500 | #define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 |
504 | #define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 | 501 | #define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13 |
505 | #define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL | 502 | #define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15 |
506 | #define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 | 503 | #define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16 |
507 | #define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL | 504 | #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 |
508 | #define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13 | 505 | #define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL |
509 | #define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL | 506 | #define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL |
510 | #define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15 | 507 | #define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL |
511 | #define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL | 508 | #define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL |
512 | #define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16 | 509 | #define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL |
513 | #define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL | 510 | #define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL |
514 | #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 | 511 | #define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL |
515 | #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | 512 | #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL |
516 | 513 | ||
517 | union uvh_gr0_tlb_int0_config_u { | 514 | union uvh_gr0_tlb_int0_config_u { |
518 | unsigned long v; | 515 | unsigned long v; |
519 | struct uvh_gr0_tlb_int0_config_s { | 516 | struct uvh_gr0_tlb_int0_config_s { |
520 | unsigned long vector_ : 8; /* RW */ | 517 | unsigned long vector_:8; /* RW */ |
521 | unsigned long dm : 3; /* RW */ | 518 | unsigned long dm:3; /* RW */ |
522 | unsigned long destmode : 1; /* RW */ | 519 | unsigned long destmode:1; /* RW */ |
523 | unsigned long status : 1; /* RO */ | 520 | unsigned long status:1; /* RO */ |
524 | unsigned long p : 1; /* RO */ | 521 | unsigned long p:1; /* RO */ |
525 | unsigned long rsvd_14 : 1; /* */ | 522 | unsigned long rsvd_14:1; |
526 | unsigned long t : 1; /* RO */ | 523 | unsigned long t:1; /* RO */ |
527 | unsigned long m : 1; /* RW */ | 524 | unsigned long m:1; /* RW */ |
528 | unsigned long rsvd_17_31: 15; /* */ | 525 | unsigned long rsvd_17_31:15; |
529 | unsigned long apic_id : 32; /* RW */ | 526 | unsigned long apic_id:32; /* RW */ |
530 | } s; | 527 | } s; |
531 | }; | 528 | }; |
532 | 529 | ||
533 | /* ========================================================================= */ | 530 | /* ========================================================================= */ |
534 | /* UVH_GR0_TLB_INT1_CONFIG */ | 531 | /* UVH_GR0_TLB_INT1_CONFIG */ |
535 | /* ========================================================================= */ | 532 | /* ========================================================================= */ |
536 | #define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL | 533 | #define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL |
537 | 534 | ||
538 | #define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 | 535 | #define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 |
539 | #define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL | 536 | #define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 |
540 | #define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 | 537 | #define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 |
541 | #define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL | 538 | #define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 |
542 | #define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 | 539 | #define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13 |
543 | #define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL | 540 | #define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15 |
544 | #define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 | 541 | #define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16 |
545 | #define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL | 542 | #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 |
546 | #define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13 | 543 | #define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL |
547 | #define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL | 544 | #define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL |
548 | #define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15 | 545 | #define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL |
549 | #define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL | 546 | #define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL |
550 | #define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16 | 547 | #define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL |
551 | #define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL | 548 | #define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL |
552 | #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 | 549 | #define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL |
553 | #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | 550 | #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL |
554 | 551 | ||
555 | union uvh_gr0_tlb_int1_config_u { | 552 | union uvh_gr0_tlb_int1_config_u { |
556 | unsigned long v; | 553 | unsigned long v; |
557 | struct uvh_gr0_tlb_int1_config_s { | 554 | struct uvh_gr0_tlb_int1_config_s { |
558 | unsigned long vector_ : 8; /* RW */ | 555 | unsigned long vector_:8; /* RW */ |
559 | unsigned long dm : 3; /* RW */ | 556 | unsigned long dm:3; /* RW */ |
560 | unsigned long destmode : 1; /* RW */ | 557 | unsigned long destmode:1; /* RW */ |
561 | unsigned long status : 1; /* RO */ | 558 | unsigned long status:1; /* RO */ |
562 | unsigned long p : 1; /* RO */ | 559 | unsigned long p:1; /* RO */ |
563 | unsigned long rsvd_14 : 1; /* */ | 560 | unsigned long rsvd_14:1; |
564 | unsigned long t : 1; /* RO */ | 561 | unsigned long t:1; /* RO */ |
565 | unsigned long m : 1; /* RW */ | 562 | unsigned long m:1; /* RW */ |
566 | unsigned long rsvd_17_31: 15; /* */ | 563 | unsigned long rsvd_17_31:15; |
567 | unsigned long apic_id : 32; /* RW */ | 564 | unsigned long apic_id:32; /* RW */ |
568 | } s; | 565 | } s; |
566 | }; | ||
567 | |||
568 | /* ========================================================================= */ | ||
569 | /* UVH_GR0_TLB_MMR_CONTROL */ | ||
570 | /* ========================================================================= */ | ||
571 | #define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL | ||
572 | #define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL | ||
573 | #define UVH_GR0_TLB_MMR_CONTROL (is_uv1_hub() ? \ | ||
574 | UV1H_GR0_TLB_MMR_CONTROL : \ | ||
575 | UV2H_GR0_TLB_MMR_CONTROL) | ||
576 | |||
577 | #define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 | ||
578 | #define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 | ||
579 | #define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 | ||
580 | #define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 | ||
581 | #define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 | ||
582 | #define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 | ||
583 | #define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL | ||
584 | #define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL | ||
585 | #define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL | ||
586 | #define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL | ||
587 | #define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL | ||
588 | #define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL | ||
589 | |||
590 | #define UV1H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 | ||
591 | #define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 | ||
592 | #define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 | ||
593 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 | ||
594 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 | ||
595 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 | ||
596 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 | ||
597 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 | ||
598 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT 54 | ||
599 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT 56 | ||
600 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT 60 | ||
601 | #define UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL | ||
602 | #define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL | ||
603 | #define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL | ||
604 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL | ||
605 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL | ||
606 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL | ||
607 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL | ||
608 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL | ||
609 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK 0x0040000000000000UL | ||
610 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL | ||
611 | #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL | ||
612 | |||
613 | #define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 | ||
614 | #define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 | ||
615 | #define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 | ||
616 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 | ||
617 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 | ||
618 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 | ||
619 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 | ||
620 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 | ||
621 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 | ||
622 | #define UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL | ||
623 | #define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL | ||
624 | #define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL | ||
625 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL | ||
626 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL | ||
627 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL | ||
628 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL | ||
629 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL | ||
630 | #define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL | ||
631 | |||
632 | union uvh_gr0_tlb_mmr_control_u { | ||
633 | unsigned long v; | ||
634 | struct uvh_gr0_tlb_mmr_control_s { | ||
635 | unsigned long index:12; /* RW */ | ||
636 | unsigned long mem_sel:2; /* RW */ | ||
637 | unsigned long rsvd_14_15:2; | ||
638 | unsigned long auto_valid_en:1; /* RW */ | ||
639 | unsigned long rsvd_17_19:3; | ||
640 | unsigned long mmr_hash_index_en:1; /* RW */ | ||
641 | unsigned long rsvd_21_29:9; | ||
642 | unsigned long mmr_write:1; /* WP */ | ||
643 | unsigned long mmr_read:1; /* WP */ | ||
644 | unsigned long rsvd_32_63:32; | ||
645 | } s; | ||
646 | struct uv1h_gr0_tlb_mmr_control_s { | ||
647 | unsigned long index:12; /* RW */ | ||
648 | unsigned long mem_sel:2; /* RW */ | ||
649 | unsigned long rsvd_14_15:2; | ||
650 | unsigned long auto_valid_en:1; /* RW */ | ||
651 | unsigned long rsvd_17_19:3; | ||
652 | unsigned long mmr_hash_index_en:1; /* RW */ | ||
653 | unsigned long rsvd_21_29:9; | ||
654 | unsigned long mmr_write:1; /* WP */ | ||
655 | unsigned long mmr_read:1; /* WP */ | ||
656 | unsigned long rsvd_32_47:16; | ||
657 | unsigned long mmr_inj_con:1; /* RW */ | ||
658 | unsigned long rsvd_49_51:3; | ||
659 | unsigned long mmr_inj_tlbram:1; /* RW */ | ||
660 | unsigned long rsvd_53:1; | ||
661 | unsigned long mmr_inj_tlbpgsize:1; /* RW */ | ||
662 | unsigned long rsvd_55:1; | ||
663 | unsigned long mmr_inj_tlbrreg:1; /* RW */ | ||
664 | unsigned long rsvd_57_59:3; | ||
665 | unsigned long mmr_inj_tlblruv:1; /* RW */ | ||
666 | unsigned long rsvd_61_63:3; | ||
667 | } s1; | ||
668 | struct uv2h_gr0_tlb_mmr_control_s { | ||
669 | unsigned long index:12; /* RW */ | ||
670 | unsigned long mem_sel:2; /* RW */ | ||
671 | unsigned long rsvd_14_15:2; | ||
672 | unsigned long auto_valid_en:1; /* RW */ | ||
673 | unsigned long rsvd_17_19:3; | ||
674 | unsigned long mmr_hash_index_en:1; /* RW */ | ||
675 | unsigned long rsvd_21_29:9; | ||
676 | unsigned long mmr_write:1; /* WP */ | ||
677 | unsigned long mmr_read:1; /* WP */ | ||
678 | unsigned long mmr_op_done:1; /* RW */ | ||
679 | unsigned long rsvd_33_47:15; | ||
680 | unsigned long mmr_inj_con:1; /* RW */ | ||
681 | unsigned long rsvd_49_51:3; | ||
682 | unsigned long mmr_inj_tlbram:1; /* RW */ | ||
683 | unsigned long rsvd_53_63:11; | ||
684 | } s2; | ||
685 | }; | ||
686 | |||
687 | /* ========================================================================= */ | ||
688 | /* UVH_GR0_TLB_MMR_READ_DATA_HI */ | ||
689 | /* ========================================================================= */ | ||
690 | #define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL | ||
691 | #define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL | ||
692 | #define UVH_GR0_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ | ||
693 | UV1H_GR0_TLB_MMR_READ_DATA_HI : \ | ||
694 | UV2H_GR0_TLB_MMR_READ_DATA_HI) | ||
695 | |||
696 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 | ||
697 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 | ||
698 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 | ||
699 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 | ||
700 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL | ||
701 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL | ||
702 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL | ||
703 | #define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL | ||
704 | |||
705 | union uvh_gr0_tlb_mmr_read_data_hi_u { | ||
706 | unsigned long v; | ||
707 | struct uvh_gr0_tlb_mmr_read_data_hi_s { | ||
708 | unsigned long pfn:41; /* RO */ | ||
709 | unsigned long gaa:2; /* RO */ | ||
710 | unsigned long dirty:1; /* RO */ | ||
711 | unsigned long larger:1; /* RO */ | ||
712 | unsigned long rsvd_45_63:19; | ||
713 | } s; | ||
714 | }; | ||
715 | |||
716 | /* ========================================================================= */ | ||
717 | /* UVH_GR0_TLB_MMR_READ_DATA_LO */ | ||
718 | /* ========================================================================= */ | ||
719 | #define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL | ||
720 | #define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL | ||
721 | #define UVH_GR0_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ | ||
722 | UV1H_GR0_TLB_MMR_READ_DATA_LO : \ | ||
723 | UV2H_GR0_TLB_MMR_READ_DATA_LO) | ||
724 | |||
725 | #define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 | ||
726 | #define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 | ||
727 | #define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 | ||
728 | #define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL | ||
729 | #define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL | ||
730 | #define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL | ||
731 | |||
732 | union uvh_gr0_tlb_mmr_read_data_lo_u { | ||
733 | unsigned long v; | ||
734 | struct uvh_gr0_tlb_mmr_read_data_lo_s { | ||
735 | unsigned long vpn:39; /* RO */ | ||
736 | unsigned long asid:24; /* RO */ | ||
737 | unsigned long valid:1; /* RO */ | ||
738 | } s; | ||
569 | }; | 739 | }; |
570 | 740 | ||
571 | /* ========================================================================= */ | 741 | /* ========================================================================= */ |
572 | /* UVH_GR1_TLB_INT0_CONFIG */ | 742 | /* UVH_GR1_TLB_INT0_CONFIG */ |
573 | /* ========================================================================= */ | 743 | /* ========================================================================= */ |
574 | #define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL | 744 | #define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL |
575 | 745 | ||
576 | #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 | 746 | #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 |
577 | #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL | 747 | #define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 |
578 | #define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 | 748 | #define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 |
579 | #define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL | 749 | #define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 |
580 | #define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 | 750 | #define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13 |
581 | #define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL | 751 | #define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15 |
582 | #define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 | 752 | #define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16 |
583 | #define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL | 753 | #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 |
584 | #define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13 | 754 | #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL |
585 | #define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL | 755 | #define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL |
586 | #define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15 | 756 | #define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL |
587 | #define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL | 757 | #define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL |
588 | #define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16 | 758 | #define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL |
589 | #define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL | 759 | #define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL |
590 | #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 | 760 | #define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL |
591 | #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | 761 | #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL |
592 | 762 | ||
593 | union uvh_gr1_tlb_int0_config_u { | 763 | union uvh_gr1_tlb_int0_config_u { |
594 | unsigned long v; | 764 | unsigned long v; |
595 | struct uvh_gr1_tlb_int0_config_s { | 765 | struct uvh_gr1_tlb_int0_config_s { |
596 | unsigned long vector_ : 8; /* RW */ | 766 | unsigned long vector_:8; /* RW */ |
597 | unsigned long dm : 3; /* RW */ | 767 | unsigned long dm:3; /* RW */ |
598 | unsigned long destmode : 1; /* RW */ | 768 | unsigned long destmode:1; /* RW */ |
599 | unsigned long status : 1; /* RO */ | 769 | unsigned long status:1; /* RO */ |
600 | unsigned long p : 1; /* RO */ | 770 | unsigned long p:1; /* RO */ |
601 | unsigned long rsvd_14 : 1; /* */ | 771 | unsigned long rsvd_14:1; |
602 | unsigned long t : 1; /* RO */ | 772 | unsigned long t:1; /* RO */ |
603 | unsigned long m : 1; /* RW */ | 773 | unsigned long m:1; /* RW */ |
604 | unsigned long rsvd_17_31: 15; /* */ | 774 | unsigned long rsvd_17_31:15; |
605 | unsigned long apic_id : 32; /* RW */ | 775 | unsigned long apic_id:32; /* RW */ |
606 | } s; | 776 | } s; |
607 | }; | 777 | }; |
608 | 778 | ||
609 | /* ========================================================================= */ | 779 | /* ========================================================================= */ |
610 | /* UVH_GR1_TLB_INT1_CONFIG */ | 780 | /* UVH_GR1_TLB_INT1_CONFIG */ |
611 | /* ========================================================================= */ | 781 | /* ========================================================================= */ |
612 | #define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL | 782 | #define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL |
613 | 783 | ||
614 | #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 | 784 | #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 |
615 | #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL | 785 | #define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 |
616 | #define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 | 786 | #define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 |
617 | #define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL | 787 | #define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 |
618 | #define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 | 788 | #define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13 |
619 | #define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL | 789 | #define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15 |
620 | #define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 | 790 | #define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16 |
621 | #define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL | 791 | #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 |
622 | #define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13 | 792 | #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL |
623 | #define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL | 793 | #define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL |
624 | #define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15 | 794 | #define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL |
625 | #define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL | 795 | #define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL |
626 | #define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16 | 796 | #define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL |
627 | #define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL | 797 | #define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL |
628 | #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 | 798 | #define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL |
629 | #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | 799 | #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL |
630 | 800 | ||
631 | union uvh_gr1_tlb_int1_config_u { | 801 | union uvh_gr1_tlb_int1_config_u { |
632 | unsigned long v; | 802 | unsigned long v; |
633 | struct uvh_gr1_tlb_int1_config_s { | 803 | struct uvh_gr1_tlb_int1_config_s { |
634 | unsigned long vector_ : 8; /* RW */ | 804 | unsigned long vector_:8; /* RW */ |
635 | unsigned long dm : 3; /* RW */ | 805 | unsigned long dm:3; /* RW */ |
636 | unsigned long destmode : 1; /* RW */ | 806 | unsigned long destmode:1; /* RW */ |
637 | unsigned long status : 1; /* RO */ | 807 | unsigned long status:1; /* RO */ |
638 | unsigned long p : 1; /* RO */ | 808 | unsigned long p:1; /* RO */ |
639 | unsigned long rsvd_14 : 1; /* */ | 809 | unsigned long rsvd_14:1; |
640 | unsigned long t : 1; /* RO */ | 810 | unsigned long t:1; /* RO */ |
641 | unsigned long m : 1; /* RW */ | 811 | unsigned long m:1; /* RW */ |
642 | unsigned long rsvd_17_31: 15; /* */ | 812 | unsigned long rsvd_17_31:15; |
643 | unsigned long apic_id : 32; /* RW */ | 813 | unsigned long apic_id:32; /* RW */ |
644 | } s; | 814 | } s; |
815 | }; | ||
816 | |||
817 | /* ========================================================================= */ | ||
818 | /* UVH_GR1_TLB_MMR_CONTROL */ | ||
819 | /* ========================================================================= */ | ||
820 | #define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL | ||
821 | #define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL | ||
822 | #define UVH_GR1_TLB_MMR_CONTROL (is_uv1_hub() ? \ | ||
823 | UV1H_GR1_TLB_MMR_CONTROL : \ | ||
824 | UV2H_GR1_TLB_MMR_CONTROL) | ||
825 | |||
826 | #define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 | ||
827 | #define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 | ||
828 | #define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 | ||
829 | #define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 | ||
830 | #define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 | ||
831 | #define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 | ||
832 | #define UVH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL | ||
833 | #define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL | ||
834 | #define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL | ||
835 | #define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL | ||
836 | #define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL | ||
837 | #define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL | ||
838 | |||
839 | #define UV1H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 | ||
840 | #define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 | ||
841 | #define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 | ||
842 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 | ||
843 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 | ||
844 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 | ||
845 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 | ||
846 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 | ||
847 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_SHFT 54 | ||
848 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_SHFT 56 | ||
849 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_SHFT 60 | ||
850 | #define UV1H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL | ||
851 | #define UV1H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL | ||
852 | #define UV1H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL | ||
853 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL | ||
854 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL | ||
855 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL | ||
856 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL | ||
857 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL | ||
858 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBPGSIZE_MASK 0x0040000000000000UL | ||
859 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL | ||
860 | #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL | ||
861 | |||
862 | #define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 | ||
863 | #define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 | ||
864 | #define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 | ||
865 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 | ||
866 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 | ||
867 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 | ||
868 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 | ||
869 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48 | ||
870 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52 | ||
871 | #define UV2H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL | ||
872 | #define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL | ||
873 | #define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL | ||
874 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL | ||
875 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL | ||
876 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL | ||
877 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL | ||
878 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL | ||
879 | #define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL | ||
880 | |||
881 | union uvh_gr1_tlb_mmr_control_u { | ||
882 | unsigned long v; | ||
883 | struct uvh_gr1_tlb_mmr_control_s { | ||
884 | unsigned long index:12; /* RW */ | ||
885 | unsigned long mem_sel:2; /* RW */ | ||
886 | unsigned long rsvd_14_15:2; | ||
887 | unsigned long auto_valid_en:1; /* RW */ | ||
888 | unsigned long rsvd_17_19:3; | ||
889 | unsigned long mmr_hash_index_en:1; /* RW */ | ||
890 | unsigned long rsvd_21_29:9; | ||
891 | unsigned long mmr_write:1; /* WP */ | ||
892 | unsigned long mmr_read:1; /* WP */ | ||
893 | unsigned long rsvd_32_63:32; | ||
894 | } s; | ||
895 | struct uv1h_gr1_tlb_mmr_control_s { | ||
896 | unsigned long index:12; /* RW */ | ||
897 | unsigned long mem_sel:2; /* RW */ | ||
898 | unsigned long rsvd_14_15:2; | ||
899 | unsigned long auto_valid_en:1; /* RW */ | ||
900 | unsigned long rsvd_17_19:3; | ||
901 | unsigned long mmr_hash_index_en:1; /* RW */ | ||
902 | unsigned long rsvd_21_29:9; | ||
903 | unsigned long mmr_write:1; /* WP */ | ||
904 | unsigned long mmr_read:1; /* WP */ | ||
905 | unsigned long rsvd_32_47:16; | ||
906 | unsigned long mmr_inj_con:1; /* RW */ | ||
907 | unsigned long rsvd_49_51:3; | ||
908 | unsigned long mmr_inj_tlbram:1; /* RW */ | ||
909 | unsigned long rsvd_53:1; | ||
910 | unsigned long mmr_inj_tlbpgsize:1; /* RW */ | ||
911 | unsigned long rsvd_55:1; | ||
912 | unsigned long mmr_inj_tlbrreg:1; /* RW */ | ||
913 | unsigned long rsvd_57_59:3; | ||
914 | unsigned long mmr_inj_tlblruv:1; /* RW */ | ||
915 | unsigned long rsvd_61_63:3; | ||
916 | } s1; | ||
917 | struct uv2h_gr1_tlb_mmr_control_s { | ||
918 | unsigned long index:12; /* RW */ | ||
919 | unsigned long mem_sel:2; /* RW */ | ||
920 | unsigned long rsvd_14_15:2; | ||
921 | unsigned long auto_valid_en:1; /* RW */ | ||
922 | unsigned long rsvd_17_19:3; | ||
923 | unsigned long mmr_hash_index_en:1; /* RW */ | ||
924 | unsigned long rsvd_21_29:9; | ||
925 | unsigned long mmr_write:1; /* WP */ | ||
926 | unsigned long mmr_read:1; /* WP */ | ||
927 | unsigned long mmr_op_done:1; /* RW */ | ||
928 | unsigned long rsvd_33_47:15; | ||
929 | unsigned long mmr_inj_con:1; /* RW */ | ||
930 | unsigned long rsvd_49_51:3; | ||
931 | unsigned long mmr_inj_tlbram:1; /* RW */ | ||
932 | unsigned long rsvd_53_63:11; | ||
933 | } s2; | ||
934 | }; | ||
935 | |||
936 | /* ========================================================================= */ | ||
937 | /* UVH_GR1_TLB_MMR_READ_DATA_HI */ | ||
938 | /* ========================================================================= */ | ||
939 | #define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL | ||
940 | #define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL | ||
941 | #define UVH_GR1_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ | ||
942 | UV1H_GR1_TLB_MMR_READ_DATA_HI : \ | ||
943 | UV2H_GR1_TLB_MMR_READ_DATA_HI) | ||
944 | |||
945 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 | ||
946 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 | ||
947 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 | ||
948 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 | ||
949 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL | ||
950 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL | ||
951 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL | ||
952 | #define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL | ||
953 | |||
954 | union uvh_gr1_tlb_mmr_read_data_hi_u { | ||
955 | unsigned long v; | ||
956 | struct uvh_gr1_tlb_mmr_read_data_hi_s { | ||
957 | unsigned long pfn:41; /* RO */ | ||
958 | unsigned long gaa:2; /* RO */ | ||
959 | unsigned long dirty:1; /* RO */ | ||
960 | unsigned long larger:1; /* RO */ | ||
961 | unsigned long rsvd_45_63:19; | ||
962 | } s; | ||
963 | }; | ||
964 | |||
965 | /* ========================================================================= */ | ||
966 | /* UVH_GR1_TLB_MMR_READ_DATA_LO */ | ||
967 | /* ========================================================================= */ | ||
968 | #define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL | ||
969 | #define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL | ||
970 | #define UVH_GR1_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ | ||
971 | UV1H_GR1_TLB_MMR_READ_DATA_LO : \ | ||
972 | UV2H_GR1_TLB_MMR_READ_DATA_LO) | ||
973 | |||
974 | #define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 | ||
975 | #define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 | ||
976 | #define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 | ||
977 | #define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL | ||
978 | #define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL | ||
979 | #define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL | ||
980 | |||
981 | union uvh_gr1_tlb_mmr_read_data_lo_u { | ||
982 | unsigned long v; | ||
983 | struct uvh_gr1_tlb_mmr_read_data_lo_s { | ||
984 | unsigned long vpn:39; /* RO */ | ||
985 | unsigned long asid:24; /* RO */ | ||
986 | unsigned long valid:1; /* RO */ | ||
987 | } s; | ||
645 | }; | 988 | }; |
646 | 989 | ||
647 | /* ========================================================================= */ | 990 | /* ========================================================================= */ |
648 | /* UVH_INT_CMPB */ | 991 | /* UVH_INT_CMPB */ |
649 | /* ========================================================================= */ | 992 | /* ========================================================================= */ |
650 | #define UVH_INT_CMPB 0x22080UL | 993 | #define UVH_INT_CMPB 0x22080UL |
651 | 994 | ||
652 | #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 | 995 | #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 |
653 | #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL | 996 | #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL |
654 | 997 | ||
655 | union uvh_int_cmpb_u { | 998 | union uvh_int_cmpb_u { |
656 | unsigned long v; | 999 | unsigned long v; |
657 | struct uvh_int_cmpb_s { | 1000 | struct uvh_int_cmpb_s { |
658 | unsigned long real_time_cmpb : 56; /* RW */ | 1001 | unsigned long real_time_cmpb:56; /* RW */ |
659 | unsigned long rsvd_56_63 : 8; /* */ | 1002 | unsigned long rsvd_56_63:8; |
660 | } s; | 1003 | } s; |
661 | }; | 1004 | }; |
662 | 1005 | ||
663 | /* ========================================================================= */ | 1006 | /* ========================================================================= */ |
664 | /* UVH_INT_CMPC */ | 1007 | /* UVH_INT_CMPC */ |
665 | /* ========================================================================= */ | 1008 | /* ========================================================================= */ |
666 | #define UVH_INT_CMPC 0x22100UL | 1009 | #define UVH_INT_CMPC 0x22100UL |
667 | 1010 | ||
668 | #define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 | 1011 | #define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 |
669 | #define UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 | 1012 | #define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL |
670 | #define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT (is_uv1_hub() ? \ | ||
671 | UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT : \ | ||
672 | UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT) | ||
673 | #define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL | ||
674 | #define UV2H_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL | ||
675 | #define UVH_INT_CMPC_REAL_TIME_CMPC_MASK (is_uv1_hub() ? \ | ||
676 | UV1H_INT_CMPC_REAL_TIME_CMPC_MASK : \ | ||
677 | UV2H_INT_CMPC_REAL_TIME_CMPC_MASK) | ||
678 | 1013 | ||
679 | union uvh_int_cmpc_u { | 1014 | union uvh_int_cmpc_u { |
680 | unsigned long v; | 1015 | unsigned long v; |
681 | struct uvh_int_cmpc_s { | 1016 | struct uvh_int_cmpc_s { |
682 | unsigned long real_time_cmpc : 56; /* RW */ | 1017 | unsigned long real_time_cmpc:56; /* RW */ |
683 | unsigned long rsvd_56_63 : 8; /* */ | 1018 | unsigned long rsvd_56_63:8; |
684 | } s; | 1019 | } s; |
685 | }; | 1020 | }; |
686 | 1021 | ||
687 | /* ========================================================================= */ | 1022 | /* ========================================================================= */ |
688 | /* UVH_INT_CMPD */ | 1023 | /* UVH_INT_CMPD */ |
689 | /* ========================================================================= */ | 1024 | /* ========================================================================= */ |
690 | #define UVH_INT_CMPD 0x22180UL | 1025 | #define UVH_INT_CMPD 0x22180UL |
691 | 1026 | ||
692 | #define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 | 1027 | #define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 |
693 | #define UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 | 1028 | #define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL |
694 | #define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT (is_uv1_hub() ? \ | ||
695 | UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT : \ | ||
696 | UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT) | ||
697 | #define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL | ||
698 | #define UV2H_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL | ||
699 | #define UVH_INT_CMPD_REAL_TIME_CMPD_MASK (is_uv1_hub() ? \ | ||
700 | UV1H_INT_CMPD_REAL_TIME_CMPD_MASK : \ | ||
701 | UV2H_INT_CMPD_REAL_TIME_CMPD_MASK) | ||
702 | 1029 | ||
703 | union uvh_int_cmpd_u { | 1030 | union uvh_int_cmpd_u { |
704 | unsigned long v; | 1031 | unsigned long v; |
705 | struct uvh_int_cmpd_s { | 1032 | struct uvh_int_cmpd_s { |
706 | unsigned long real_time_cmpd : 56; /* RW */ | 1033 | unsigned long real_time_cmpd:56; /* RW */ |
707 | unsigned long rsvd_56_63 : 8; /* */ | 1034 | unsigned long rsvd_56_63:8; |
708 | } s; | 1035 | } s; |
709 | }; | 1036 | }; |
710 | 1037 | ||
711 | /* ========================================================================= */ | 1038 | /* ========================================================================= */ |
712 | /* UVH_IPI_INT */ | 1039 | /* UVH_IPI_INT */ |
713 | /* ========================================================================= */ | 1040 | /* ========================================================================= */ |
714 | #define UVH_IPI_INT 0x60500UL | 1041 | #define UVH_IPI_INT 0x60500UL |
715 | #define UVH_IPI_INT_32 0x348 | 1042 | #define UVH_IPI_INT_32 0x348 |
716 | 1043 | ||
717 | #define UVH_IPI_INT_VECTOR_SHFT 0 | 1044 | #define UVH_IPI_INT_VECTOR_SHFT 0 |
718 | #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL | 1045 | #define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 |
719 | #define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 | 1046 | #define UVH_IPI_INT_DESTMODE_SHFT 11 |
720 | #define UVH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL | 1047 | #define UVH_IPI_INT_APIC_ID_SHFT 16 |
721 | #define UVH_IPI_INT_DESTMODE_SHFT 11 | 1048 | #define UVH_IPI_INT_SEND_SHFT 63 |
722 | #define UVH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL | 1049 | #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL |
723 | #define UVH_IPI_INT_APIC_ID_SHFT 16 | 1050 | #define UVH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL |
724 | #define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL | 1051 | #define UVH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL |
725 | #define UVH_IPI_INT_SEND_SHFT 63 | 1052 | #define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL |
726 | #define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL | 1053 | #define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL |
727 | 1054 | ||
728 | union uvh_ipi_int_u { | 1055 | union uvh_ipi_int_u { |
729 | unsigned long v; | 1056 | unsigned long v; |
730 | struct uvh_ipi_int_s { | 1057 | struct uvh_ipi_int_s { |
731 | unsigned long vector_ : 8; /* RW */ | 1058 | unsigned long vector_:8; /* RW */ |
732 | unsigned long delivery_mode : 3; /* RW */ | 1059 | unsigned long delivery_mode:3; /* RW */ |
733 | unsigned long destmode : 1; /* RW */ | 1060 | unsigned long destmode:1; /* RW */ |
734 | unsigned long rsvd_12_15 : 4; /* */ | 1061 | unsigned long rsvd_12_15:4; |
735 | unsigned long apic_id : 32; /* RW */ | 1062 | unsigned long apic_id:32; /* RW */ |
736 | unsigned long rsvd_48_62 : 15; /* */ | 1063 | unsigned long rsvd_48_62:15; |
737 | unsigned long send : 1; /* WP */ | 1064 | unsigned long send:1; /* WP */ |
738 | } s; | 1065 | } s; |
739 | }; | 1066 | }; |
740 | 1067 | ||
741 | /* ========================================================================= */ | 1068 | /* ========================================================================= */ |
742 | /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ | 1069 | /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ |
743 | /* ========================================================================= */ | 1070 | /* ========================================================================= */ |
744 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL | 1071 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL |
745 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 | 1072 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 |
746 | 1073 | ||
747 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 | 1074 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 |
748 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL | ||
749 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 | 1075 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 |
1076 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL | ||
750 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL | 1077 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL |
751 | 1078 | ||
752 | union uvh_lb_bau_intd_payload_queue_first_u { | 1079 | union uvh_lb_bau_intd_payload_queue_first_u { |
753 | unsigned long v; | 1080 | unsigned long v; |
754 | struct uvh_lb_bau_intd_payload_queue_first_s { | 1081 | struct uvh_lb_bau_intd_payload_queue_first_s { |
755 | unsigned long rsvd_0_3: 4; /* */ | 1082 | unsigned long rsvd_0_3:4; |
756 | unsigned long address : 39; /* RW */ | 1083 | unsigned long address:39; /* RW */ |
757 | unsigned long rsvd_43_48: 6; /* */ | 1084 | unsigned long rsvd_43_48:6; |
758 | unsigned long node_id : 14; /* RW */ | 1085 | unsigned long node_id:14; /* RW */ |
759 | unsigned long rsvd_63 : 1; /* */ | 1086 | unsigned long rsvd_63:1; |
760 | } s; | 1087 | } s; |
761 | }; | 1088 | }; |
762 | 1089 | ||
763 | /* ========================================================================= */ | 1090 | /* ========================================================================= */ |
764 | /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ | 1091 | /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ |
765 | /* ========================================================================= */ | 1092 | /* ========================================================================= */ |
766 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL | 1093 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL |
767 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 | 1094 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 |
768 | 1095 | ||
769 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 | 1096 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 |
770 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL | 1097 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL |
771 | 1098 | ||
772 | union uvh_lb_bau_intd_payload_queue_last_u { | 1099 | union uvh_lb_bau_intd_payload_queue_last_u { |
773 | unsigned long v; | 1100 | unsigned long v; |
774 | struct uvh_lb_bau_intd_payload_queue_last_s { | 1101 | struct uvh_lb_bau_intd_payload_queue_last_s { |
775 | unsigned long rsvd_0_3: 4; /* */ | 1102 | unsigned long rsvd_0_3:4; |
776 | unsigned long address : 39; /* RW */ | 1103 | unsigned long address:39; /* RW */ |
777 | unsigned long rsvd_43_63: 21; /* */ | 1104 | unsigned long rsvd_43_63:21; |
778 | } s; | 1105 | } s; |
779 | }; | 1106 | }; |
780 | 1107 | ||
781 | /* ========================================================================= */ | 1108 | /* ========================================================================= */ |
782 | /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ | 1109 | /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ |
783 | /* ========================================================================= */ | 1110 | /* ========================================================================= */ |
784 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL | 1111 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL |
785 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 | 1112 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 |
786 | 1113 | ||
787 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 | 1114 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 |
788 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL | 1115 | #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL |
789 | 1116 | ||
790 | union uvh_lb_bau_intd_payload_queue_tail_u { | 1117 | union uvh_lb_bau_intd_payload_queue_tail_u { |
791 | unsigned long v; | 1118 | unsigned long v; |
792 | struct uvh_lb_bau_intd_payload_queue_tail_s { | 1119 | struct uvh_lb_bau_intd_payload_queue_tail_s { |
793 | unsigned long rsvd_0_3: 4; /* */ | 1120 | unsigned long rsvd_0_3:4; |
794 | unsigned long address : 39; /* RW */ | 1121 | unsigned long address:39; /* RW */ |
795 | unsigned long rsvd_43_63: 21; /* */ | 1122 | unsigned long rsvd_43_63:21; |
796 | } s; | 1123 | } s; |
797 | }; | 1124 | }; |
798 | 1125 | ||
799 | /* ========================================================================= */ | 1126 | /* ========================================================================= */ |
800 | /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ | 1127 | /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ |
801 | /* ========================================================================= */ | 1128 | /* ========================================================================= */ |
802 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL | 1129 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL |
803 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 | 1130 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 |
804 | 1131 | ||
805 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 | 1132 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 |
806 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL | ||
807 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 | 1133 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 |
808 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL | ||
809 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 | 1134 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 |
810 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL | ||
811 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 | 1135 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 |
812 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL | ||
813 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 | 1136 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 |
814 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL | ||
815 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 | 1137 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 |
816 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL | ||
817 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 | 1138 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 |
818 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL | ||
819 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 | 1139 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 |
820 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL | ||
821 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 | 1140 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 |
822 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL | ||
823 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 | 1141 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 |
824 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL | ||
825 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 | 1142 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 |
826 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL | ||
827 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 | 1143 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 |
828 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL | ||
829 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 | 1144 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 |
830 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL | ||
831 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 | 1145 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 |
832 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL | ||
833 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 | 1146 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 |
834 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL | ||
835 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 | 1147 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 |
1148 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL | ||
1149 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL | ||
1150 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL | ||
1151 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL | ||
1152 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL | ||
1153 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL | ||
1154 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL | ||
1155 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL | ||
1156 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL | ||
1157 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL | ||
1158 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL | ||
1159 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL | ||
1160 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL | ||
1161 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL | ||
1162 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL | ||
836 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL | 1163 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL |
837 | 1164 | ||
838 | union uvh_lb_bau_intd_software_acknowledge_u { | 1165 | union uvh_lb_bau_intd_software_acknowledge_u { |
839 | unsigned long v; | 1166 | unsigned long v; |
840 | struct uvh_lb_bau_intd_software_acknowledge_s { | 1167 | struct uvh_lb_bau_intd_software_acknowledge_s { |
841 | unsigned long pending_0 : 1; /* RW, W1C */ | 1168 | unsigned long pending_0:1; /* RW, W1C */ |
842 | unsigned long pending_1 : 1; /* RW, W1C */ | 1169 | unsigned long pending_1:1; /* RW, W1C */ |
843 | unsigned long pending_2 : 1; /* RW, W1C */ | 1170 | unsigned long pending_2:1; /* RW, W1C */ |
844 | unsigned long pending_3 : 1; /* RW, W1C */ | 1171 | unsigned long pending_3:1; /* RW, W1C */ |
845 | unsigned long pending_4 : 1; /* RW, W1C */ | 1172 | unsigned long pending_4:1; /* RW, W1C */ |
846 | unsigned long pending_5 : 1; /* RW, W1C */ | 1173 | unsigned long pending_5:1; /* RW, W1C */ |
847 | unsigned long pending_6 : 1; /* RW, W1C */ | 1174 | unsigned long pending_6:1; /* RW, W1C */ |
848 | unsigned long pending_7 : 1; /* RW, W1C */ | 1175 | unsigned long pending_7:1; /* RW, W1C */ |
849 | unsigned long timeout_0 : 1; /* RW, W1C */ | 1176 | unsigned long timeout_0:1; /* RW, W1C */ |
850 | unsigned long timeout_1 : 1; /* RW, W1C */ | 1177 | unsigned long timeout_1:1; /* RW, W1C */ |
851 | unsigned long timeout_2 : 1; /* RW, W1C */ | 1178 | unsigned long timeout_2:1; /* RW, W1C */ |
852 | unsigned long timeout_3 : 1; /* RW, W1C */ | 1179 | unsigned long timeout_3:1; /* RW, W1C */ |
853 | unsigned long timeout_4 : 1; /* RW, W1C */ | 1180 | unsigned long timeout_4:1; /* RW, W1C */ |
854 | unsigned long timeout_5 : 1; /* RW, W1C */ | 1181 | unsigned long timeout_5:1; /* RW, W1C */ |
855 | unsigned long timeout_6 : 1; /* RW, W1C */ | 1182 | unsigned long timeout_6:1; /* RW, W1C */ |
856 | unsigned long timeout_7 : 1; /* RW, W1C */ | 1183 | unsigned long timeout_7:1; /* RW, W1C */ |
857 | unsigned long rsvd_16_63: 48; /* */ | 1184 | unsigned long rsvd_16_63:48; |
858 | } s; | 1185 | } s; |
859 | }; | 1186 | }; |
860 | 1187 | ||
861 | /* ========================================================================= */ | 1188 | /* ========================================================================= */ |
862 | /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ | 1189 | /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ |
863 | /* ========================================================================= */ | 1190 | /* ========================================================================= */ |
864 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL | 1191 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL |
865 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 | 1192 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 |
866 | 1193 | ||
867 | /* ========================================================================= */ | 1194 | /* ========================================================================= */ |
868 | /* UVH_LB_BAU_MISC_CONTROL */ | 1195 | /* UVH_LB_BAU_MISC_CONTROL */ |
869 | /* ========================================================================= */ | 1196 | /* ========================================================================= */ |
870 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL | 1197 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL |
871 | #define UVH_LB_BAU_MISC_CONTROL_32 0xa10 | 1198 | #define UVH_LB_BAU_MISC_CONTROL_32 0xa10 |
872 | 1199 | ||
873 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 | 1200 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 |
874 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | 1201 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 |
875 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 | 1202 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 |
876 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | 1203 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 |
877 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 | ||
878 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
879 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 | ||
880 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
881 | #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 | 1204 | #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 |
882 | #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
883 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 | 1205 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 |
884 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
885 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 | 1206 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 |
886 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
887 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 | 1207 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 |
888 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
889 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 | 1208 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 |
890 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
891 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 | 1209 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 |
892 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
893 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 | 1210 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 |
894 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
895 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 | 1211 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 |
896 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
897 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 | 1212 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 |
898 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
899 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 | 1213 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 |
900 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
901 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 | 1214 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 |
1215 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | ||
1216 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | ||
1217 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
1218 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
1219 | #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
1220 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
1221 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
1222 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
1223 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
1224 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
1225 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
1226 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
1227 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
1228 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
902 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL | 1229 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL |
903 | 1230 | ||
904 | #define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 | 1231 | #define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 |
905 | #define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | 1232 | #define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 |
906 | #define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 | 1233 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 |
907 | #define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | 1234 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 |
908 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 | ||
909 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
910 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 | ||
911 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
912 | #define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 | 1235 | #define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 |
913 | #define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
914 | #define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 | 1236 | #define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 |
915 | #define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
916 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 | 1237 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 |
917 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
918 | #define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 | 1238 | #define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 |
919 | #define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
920 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 | 1239 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 |
921 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
922 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 | 1240 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 |
923 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
924 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 | 1241 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 |
925 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
926 | #define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 | 1242 | #define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 |
927 | #define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
928 | #define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 | 1243 | #define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 |
929 | #define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
930 | #define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 | 1244 | #define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 |
931 | #define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
932 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 | 1245 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 |
1246 | #define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 | ||
1247 | #define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | ||
1248 | #define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | ||
1249 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
1250 | #define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
1251 | #define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
1252 | #define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
1253 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
1254 | #define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
1255 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
1256 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
1257 | #define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
1258 | #define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
1259 | #define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
1260 | #define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
933 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL | 1261 | #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL |
934 | #define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 | 1262 | #define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL |
935 | #define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL | 1263 | |
936 | 1264 | #define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 | |
937 | #define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 | 1265 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 |
938 | #define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | 1266 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 |
939 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 | 1267 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 |
940 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | ||
941 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 | ||
942 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
943 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 | ||
944 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
945 | #define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 | 1268 | #define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 |
946 | #define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
947 | #define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 | 1269 | #define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 |
948 | #define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
949 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 | 1270 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 |
950 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
951 | #define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 | 1271 | #define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 |
952 | #define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
953 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 | 1272 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 |
954 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
955 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 | 1273 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 |
956 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
957 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 | 1274 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 |
958 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
959 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 | 1275 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 |
960 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
961 | #define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 | 1276 | #define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 |
962 | #define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
963 | #define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 | 1277 | #define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 |
964 | #define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
965 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 | 1278 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 |
966 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL | ||
967 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 | 1279 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 |
968 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL | 1280 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 |
969 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 | ||
970 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL | ||
971 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 | 1281 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 |
972 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL | ||
973 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 | 1282 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 |
974 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL | ||
975 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 | 1283 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 |
976 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL | ||
977 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 | 1284 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 |
978 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL | ||
979 | #define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 | 1285 | #define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 |
1286 | #define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 | ||
1287 | #define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | ||
1288 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | ||
1289 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
1290 | #define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
1291 | #define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
1292 | #define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
1293 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
1294 | #define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
1295 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
1296 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
1297 | #define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
1298 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
1299 | #define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
1300 | #define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
1301 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL | ||
1302 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL | ||
1303 | #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL | ||
1304 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL | ||
1305 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL | ||
1306 | #define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL | ||
1307 | #define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL | ||
980 | #define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL | 1308 | #define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL |
981 | #define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 | 1309 | #define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL |
982 | #define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL | ||
983 | 1310 | ||
984 | union uvh_lb_bau_misc_control_u { | 1311 | union uvh_lb_bau_misc_control_u { |
985 | unsigned long v; | 1312 | unsigned long v; |
986 | struct uvh_lb_bau_misc_control_s { | 1313 | struct uvh_lb_bau_misc_control_s { |
987 | unsigned long rejection_delay : 8; /* RW */ | 1314 | unsigned long rejection_delay:8; /* RW */ |
988 | unsigned long apic_mode : 1; /* RW */ | 1315 | unsigned long apic_mode:1; /* RW */ |
989 | unsigned long force_broadcast : 1; /* RW */ | 1316 | unsigned long force_broadcast:1; /* RW */ |
990 | unsigned long force_lock_nop : 1; /* RW */ | 1317 | unsigned long force_lock_nop:1; /* RW */ |
991 | unsigned long qpi_agent_presence_vector : 3; /* RW */ | 1318 | unsigned long qpi_agent_presence_vector:3; /* RW */ |
992 | unsigned long descriptor_fetch_mode : 1; /* RW */ | 1319 | unsigned long descriptor_fetch_mode:1; /* RW */ |
993 | unsigned long enable_intd_soft_ack_mode : 1; /* RW */ | 1320 | unsigned long enable_intd_soft_ack_mode:1; /* RW */ |
994 | unsigned long intd_soft_ack_timeout_period : 4; /* RW */ | 1321 | unsigned long intd_soft_ack_timeout_period:4; /* RW */ |
995 | unsigned long enable_dual_mapping_mode : 1; /* RW */ | 1322 | unsigned long enable_dual_mapping_mode:1; /* RW */ |
996 | unsigned long vga_io_port_decode_enable : 1; /* RW */ | 1323 | unsigned long vga_io_port_decode_enable:1; /* RW */ |
997 | unsigned long vga_io_port_16_bit_decode : 1; /* RW */ | 1324 | unsigned long vga_io_port_16_bit_decode:1; /* RW */ |
998 | unsigned long suppress_dest_registration : 1; /* RW */ | 1325 | unsigned long suppress_dest_registration:1; /* RW */ |
999 | unsigned long programmed_initial_priority : 3; /* RW */ | 1326 | unsigned long programmed_initial_priority:3; /* RW */ |
1000 | unsigned long use_incoming_priority : 1; /* RW */ | 1327 | unsigned long use_incoming_priority:1; /* RW */ |
1001 | unsigned long enable_programmed_initial_priority : 1; /* RW */ | 1328 | unsigned long enable_programmed_initial_priority:1;/* RW */ |
1002 | unsigned long rsvd_29_63 : 35; | 1329 | unsigned long rsvd_29_63:35; |
1003 | } s; | 1330 | } s; |
1004 | struct uv1h_lb_bau_misc_control_s { | 1331 | struct uv1h_lb_bau_misc_control_s { |
1005 | unsigned long rejection_delay : 8; /* RW */ | 1332 | unsigned long rejection_delay:8; /* RW */ |
1006 | unsigned long apic_mode : 1; /* RW */ | 1333 | unsigned long apic_mode:1; /* RW */ |
1007 | unsigned long force_broadcast : 1; /* RW */ | 1334 | unsigned long force_broadcast:1; /* RW */ |
1008 | unsigned long force_lock_nop : 1; /* RW */ | 1335 | unsigned long force_lock_nop:1; /* RW */ |
1009 | unsigned long qpi_agent_presence_vector : 3; /* RW */ | 1336 | unsigned long qpi_agent_presence_vector:3; /* RW */ |
1010 | unsigned long descriptor_fetch_mode : 1; /* RW */ | 1337 | unsigned long descriptor_fetch_mode:1; /* RW */ |
1011 | unsigned long enable_intd_soft_ack_mode : 1; /* RW */ | 1338 | unsigned long enable_intd_soft_ack_mode:1; /* RW */ |
1012 | unsigned long intd_soft_ack_timeout_period : 4; /* RW */ | 1339 | unsigned long intd_soft_ack_timeout_period:4; /* RW */ |
1013 | unsigned long enable_dual_mapping_mode : 1; /* RW */ | 1340 | unsigned long enable_dual_mapping_mode:1; /* RW */ |
1014 | unsigned long vga_io_port_decode_enable : 1; /* RW */ | 1341 | unsigned long vga_io_port_decode_enable:1; /* RW */ |
1015 | unsigned long vga_io_port_16_bit_decode : 1; /* RW */ | 1342 | unsigned long vga_io_port_16_bit_decode:1; /* RW */ |
1016 | unsigned long suppress_dest_registration : 1; /* RW */ | 1343 | unsigned long suppress_dest_registration:1; /* RW */ |
1017 | unsigned long programmed_initial_priority : 3; /* RW */ | 1344 | unsigned long programmed_initial_priority:3; /* RW */ |
1018 | unsigned long use_incoming_priority : 1; /* RW */ | 1345 | unsigned long use_incoming_priority:1; /* RW */ |
1019 | unsigned long enable_programmed_initial_priority : 1; /* RW */ | 1346 | unsigned long enable_programmed_initial_priority:1;/* RW */ |
1020 | unsigned long rsvd_29_47 : 19; /* */ | 1347 | unsigned long rsvd_29_47:19; |
1021 | unsigned long fun : 16; /* RW */ | 1348 | unsigned long fun:16; /* RW */ |
1022 | } s1; | 1349 | } s1; |
1023 | struct uv2h_lb_bau_misc_control_s { | 1350 | struct uv2h_lb_bau_misc_control_s { |
1024 | unsigned long rejection_delay : 8; /* RW */ | 1351 | unsigned long rejection_delay:8; /* RW */ |
1025 | unsigned long apic_mode : 1; /* RW */ | 1352 | unsigned long apic_mode:1; /* RW */ |
1026 | unsigned long force_broadcast : 1; /* RW */ | 1353 | unsigned long force_broadcast:1; /* RW */ |
1027 | unsigned long force_lock_nop : 1; /* RW */ | 1354 | unsigned long force_lock_nop:1; /* RW */ |
1028 | unsigned long qpi_agent_presence_vector : 3; /* RW */ | 1355 | unsigned long qpi_agent_presence_vector:3; /* RW */ |
1029 | unsigned long descriptor_fetch_mode : 1; /* RW */ | 1356 | unsigned long descriptor_fetch_mode:1; /* RW */ |
1030 | unsigned long enable_intd_soft_ack_mode : 1; /* RW */ | 1357 | unsigned long enable_intd_soft_ack_mode:1; /* RW */ |
1031 | unsigned long intd_soft_ack_timeout_period : 4; /* RW */ | 1358 | unsigned long intd_soft_ack_timeout_period:4; /* RW */ |
1032 | unsigned long enable_dual_mapping_mode : 1; /* RW */ | 1359 | unsigned long enable_dual_mapping_mode:1; /* RW */ |
1033 | unsigned long vga_io_port_decode_enable : 1; /* RW */ | 1360 | unsigned long vga_io_port_decode_enable:1; /* RW */ |
1034 | unsigned long vga_io_port_16_bit_decode : 1; /* RW */ | 1361 | unsigned long vga_io_port_16_bit_decode:1; /* RW */ |
1035 | unsigned long suppress_dest_registration : 1; /* RW */ | 1362 | unsigned long suppress_dest_registration:1; /* RW */ |
1036 | unsigned long programmed_initial_priority : 3; /* RW */ | 1363 | unsigned long programmed_initial_priority:3; /* RW */ |
1037 | unsigned long use_incoming_priority : 1; /* RW */ | 1364 | unsigned long use_incoming_priority:1; /* RW */ |
1038 | unsigned long enable_programmed_initial_priority : 1; /* RW */ | 1365 | unsigned long enable_programmed_initial_priority:1;/* RW */ |
1039 | unsigned long enable_automatic_apic_mode_selection : 1; /* RW */ | 1366 | unsigned long enable_automatic_apic_mode_selection:1;/* RW */ |
1040 | unsigned long apic_mode_status : 1; /* RO */ | 1367 | unsigned long apic_mode_status:1; /* RO */ |
1041 | unsigned long suppress_interrupts_to_self : 1; /* RW */ | 1368 | unsigned long suppress_interrupts_to_self:1; /* RW */ |
1042 | unsigned long enable_lock_based_system_flush : 1; /* RW */ | 1369 | unsigned long enable_lock_based_system_flush:1;/* RW */ |
1043 | unsigned long enable_extended_sb_status : 1; /* RW */ | 1370 | unsigned long enable_extended_sb_status:1; /* RW */ |
1044 | unsigned long suppress_int_prio_udt_to_self : 1; /* RW */ | 1371 | unsigned long suppress_int_prio_udt_to_self:1;/* RW */ |
1045 | unsigned long use_legacy_descriptor_formats : 1; /* RW */ | 1372 | unsigned long use_legacy_descriptor_formats:1;/* RW */ |
1046 | unsigned long rsvd_36_47 : 12; /* */ | 1373 | unsigned long rsvd_36_47:12; |
1047 | unsigned long fun : 16; /* RW */ | 1374 | unsigned long fun:16; /* RW */ |
1048 | } s2; | 1375 | } s2; |
1049 | }; | 1376 | }; |
1050 | 1377 | ||
1051 | /* ========================================================================= */ | 1378 | /* ========================================================================= */ |
1052 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ | 1379 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ |
1053 | /* ========================================================================= */ | 1380 | /* ========================================================================= */ |
1054 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL | 1381 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL |
1055 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 | 1382 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 |
1056 | 1383 | ||
1057 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 | 1384 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 |
1058 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL | 1385 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 |
1059 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 | 1386 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 |
1060 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL | 1387 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL |
1061 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 | 1388 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL |
1062 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL | 1389 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL |
1063 | 1390 | ||
1064 | union uvh_lb_bau_sb_activation_control_u { | 1391 | union uvh_lb_bau_sb_activation_control_u { |
1065 | unsigned long v; | 1392 | unsigned long v; |
1066 | struct uvh_lb_bau_sb_activation_control_s { | 1393 | struct uvh_lb_bau_sb_activation_control_s { |
1067 | unsigned long index : 6; /* RW */ | 1394 | unsigned long index:6; /* RW */ |
1068 | unsigned long rsvd_6_61: 56; /* */ | 1395 | unsigned long rsvd_6_61:56; |
1069 | unsigned long push : 1; /* WP */ | 1396 | unsigned long push:1; /* WP */ |
1070 | unsigned long init : 1; /* WP */ | 1397 | unsigned long init:1; /* WP */ |
1071 | } s; | 1398 | } s; |
1072 | }; | 1399 | }; |
1073 | 1400 | ||
1074 | /* ========================================================================= */ | 1401 | /* ========================================================================= */ |
1075 | /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ | 1402 | /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ |
1076 | /* ========================================================================= */ | 1403 | /* ========================================================================= */ |
1077 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL | 1404 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL |
1078 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 | 1405 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 |
1079 | 1406 | ||
1080 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 | 1407 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 |
1081 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL | 1408 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL |
1082 | 1409 | ||
1083 | union uvh_lb_bau_sb_activation_status_0_u { | 1410 | union uvh_lb_bau_sb_activation_status_0_u { |
1084 | unsigned long v; | 1411 | unsigned long v; |
1085 | struct uvh_lb_bau_sb_activation_status_0_s { | 1412 | struct uvh_lb_bau_sb_activation_status_0_s { |
1086 | unsigned long status : 64; /* RW */ | 1413 | unsigned long status:64; /* RW */ |
1087 | } s; | 1414 | } s; |
1088 | }; | 1415 | }; |
1089 | 1416 | ||
1090 | /* ========================================================================= */ | 1417 | /* ========================================================================= */ |
1091 | /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ | 1418 | /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ |
1092 | /* ========================================================================= */ | 1419 | /* ========================================================================= */ |
1093 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL | 1420 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL |
1094 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 | 1421 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 |
1095 | 1422 | ||
1096 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 | 1423 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 |
1097 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL | 1424 | #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL |
1098 | 1425 | ||
1099 | union uvh_lb_bau_sb_activation_status_1_u { | 1426 | union uvh_lb_bau_sb_activation_status_1_u { |
1100 | unsigned long v; | 1427 | unsigned long v; |
1101 | struct uvh_lb_bau_sb_activation_status_1_s { | 1428 | struct uvh_lb_bau_sb_activation_status_1_s { |
1102 | unsigned long status : 64; /* RW */ | 1429 | unsigned long status:64; /* RW */ |
1103 | } s; | 1430 | } s; |
1104 | }; | 1431 | }; |
1105 | 1432 | ||
1106 | /* ========================================================================= */ | 1433 | /* ========================================================================= */ |
1107 | /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ | 1434 | /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ |
1108 | /* ========================================================================= */ | 1435 | /* ========================================================================= */ |
1109 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL | 1436 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL |
1110 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 | 1437 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 |
1111 | 1438 | ||
1112 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 | 1439 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 |
1113 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL | 1440 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 |
1114 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 | 1441 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL |
1115 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL | 1442 | #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL |
1116 | 1443 | ||
1117 | union uvh_lb_bau_sb_descriptor_base_u { | 1444 | union uvh_lb_bau_sb_descriptor_base_u { |
1118 | unsigned long v; | 1445 | unsigned long v; |
1119 | struct uvh_lb_bau_sb_descriptor_base_s { | 1446 | struct uvh_lb_bau_sb_descriptor_base_s { |
1120 | unsigned long rsvd_0_11 : 12; /* */ | 1447 | unsigned long rsvd_0_11:12; |
1121 | unsigned long page_address : 31; /* RW */ | 1448 | unsigned long page_address:31; /* RW */ |
1122 | unsigned long rsvd_43_48 : 6; /* */ | 1449 | unsigned long rsvd_43_48:6; |
1123 | unsigned long node_id : 14; /* RW */ | 1450 | unsigned long node_id:14; /* RW */ |
1124 | unsigned long rsvd_63 : 1; /* */ | 1451 | unsigned long rsvd_63:1; |
1125 | } s; | 1452 | } s; |
1126 | }; | 1453 | }; |
1127 | 1454 | ||
1128 | /* ========================================================================= */ | 1455 | /* ========================================================================= */ |
1129 | /* UVH_NODE_ID */ | 1456 | /* UVH_NODE_ID */ |
1130 | /* ========================================================================= */ | 1457 | /* ========================================================================= */ |
1131 | #define UVH_NODE_ID 0x0UL | 1458 | #define UVH_NODE_ID 0x0UL |
1132 | 1459 | ||
1133 | #define UVH_NODE_ID_FORCE1_SHFT 0 | 1460 | #define UVH_NODE_ID_FORCE1_SHFT 0 |
1134 | #define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL | 1461 | #define UVH_NODE_ID_MANUFACTURER_SHFT 1 |
1135 | #define UVH_NODE_ID_MANUFACTURER_SHFT 1 | 1462 | #define UVH_NODE_ID_PART_NUMBER_SHFT 12 |
1136 | #define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL | 1463 | #define UVH_NODE_ID_REVISION_SHFT 28 |
1137 | #define UVH_NODE_ID_PART_NUMBER_SHFT 12 | 1464 | #define UVH_NODE_ID_NODE_ID_SHFT 32 |
1138 | #define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL | 1465 | #define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL |
1139 | #define UVH_NODE_ID_REVISION_SHFT 28 | 1466 | #define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL |
1140 | #define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL | 1467 | #define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL |
1141 | #define UVH_NODE_ID_NODE_ID_SHFT 32 | 1468 | #define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL |
1142 | #define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL | 1469 | #define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL |
1143 | 1470 | ||
1144 | #define UV1H_NODE_ID_FORCE1_SHFT 0 | 1471 | #define UV1H_NODE_ID_FORCE1_SHFT 0 |
1145 | #define UV1H_NODE_ID_FORCE1_MASK 0x0000000000000001UL | 1472 | #define UV1H_NODE_ID_MANUFACTURER_SHFT 1 |
1146 | #define UV1H_NODE_ID_MANUFACTURER_SHFT 1 | 1473 | #define UV1H_NODE_ID_PART_NUMBER_SHFT 12 |
1147 | #define UV1H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL | 1474 | #define UV1H_NODE_ID_REVISION_SHFT 28 |
1148 | #define UV1H_NODE_ID_PART_NUMBER_SHFT 12 | 1475 | #define UV1H_NODE_ID_NODE_ID_SHFT 32 |
1149 | #define UV1H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL | 1476 | #define UV1H_NODE_ID_NODES_PER_BIT_SHFT 48 |
1150 | #define UV1H_NODE_ID_REVISION_SHFT 28 | 1477 | #define UV1H_NODE_ID_NI_PORT_SHFT 56 |
1151 | #define UV1H_NODE_ID_REVISION_MASK 0x00000000f0000000UL | 1478 | #define UV1H_NODE_ID_FORCE1_MASK 0x0000000000000001UL |
1152 | #define UV1H_NODE_ID_NODE_ID_SHFT 32 | 1479 | #define UV1H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL |
1153 | #define UV1H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL | 1480 | #define UV1H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL |
1154 | #define UV1H_NODE_ID_NODES_PER_BIT_SHFT 48 | 1481 | #define UV1H_NODE_ID_REVISION_MASK 0x00000000f0000000UL |
1155 | #define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL | 1482 | #define UV1H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL |
1156 | #define UV1H_NODE_ID_NI_PORT_SHFT 56 | 1483 | #define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL |
1157 | #define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL | 1484 | #define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL |
1158 | 1485 | ||
1159 | #define UV2H_NODE_ID_FORCE1_SHFT 0 | 1486 | #define UV2H_NODE_ID_FORCE1_SHFT 0 |
1160 | #define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL | 1487 | #define UV2H_NODE_ID_MANUFACTURER_SHFT 1 |
1161 | #define UV2H_NODE_ID_MANUFACTURER_SHFT 1 | 1488 | #define UV2H_NODE_ID_PART_NUMBER_SHFT 12 |
1162 | #define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL | 1489 | #define UV2H_NODE_ID_REVISION_SHFT 28 |
1163 | #define UV2H_NODE_ID_PART_NUMBER_SHFT 12 | 1490 | #define UV2H_NODE_ID_NODE_ID_SHFT 32 |
1164 | #define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL | 1491 | #define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50 |
1165 | #define UV2H_NODE_ID_REVISION_SHFT 28 | 1492 | #define UV2H_NODE_ID_NI_PORT_SHFT 57 |
1166 | #define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL | 1493 | #define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL |
1167 | #define UV2H_NODE_ID_NODE_ID_SHFT 32 | 1494 | #define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL |
1168 | #define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL | 1495 | #define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL |
1169 | #define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50 | 1496 | #define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL |
1170 | #define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL | 1497 | #define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL |
1171 | #define UV2H_NODE_ID_NI_PORT_SHFT 57 | 1498 | #define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL |
1172 | #define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL | 1499 | #define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL |
1173 | 1500 | ||
1174 | union uvh_node_id_u { | 1501 | union uvh_node_id_u { |
1175 | unsigned long v; | 1502 | unsigned long v; |
1176 | struct uvh_node_id_s { | 1503 | struct uvh_node_id_s { |
1177 | unsigned long force1 : 1; /* RO */ | 1504 | unsigned long force1:1; /* RO */ |
1178 | unsigned long manufacturer : 11; /* RO */ | 1505 | unsigned long manufacturer:11; /* RO */ |
1179 | unsigned long part_number : 16; /* RO */ | 1506 | unsigned long part_number:16; /* RO */ |
1180 | unsigned long revision : 4; /* RO */ | 1507 | unsigned long revision:4; /* RO */ |
1181 | unsigned long node_id : 15; /* RW */ | 1508 | unsigned long node_id:15; /* RW */ |
1182 | unsigned long rsvd_47_63 : 17; | 1509 | unsigned long rsvd_47_63:17; |
1183 | } s; | 1510 | } s; |
1184 | struct uv1h_node_id_s { | 1511 | struct uv1h_node_id_s { |
1185 | unsigned long force1 : 1; /* RO */ | 1512 | unsigned long force1:1; /* RO */ |
1186 | unsigned long manufacturer : 11; /* RO */ | 1513 | unsigned long manufacturer:11; /* RO */ |
1187 | unsigned long part_number : 16; /* RO */ | 1514 | unsigned long part_number:16; /* RO */ |
1188 | unsigned long revision : 4; /* RO */ | 1515 | unsigned long revision:4; /* RO */ |
1189 | unsigned long node_id : 15; /* RW */ | 1516 | unsigned long node_id:15; /* RW */ |
1190 | unsigned long rsvd_47 : 1; /* */ | 1517 | unsigned long rsvd_47:1; |
1191 | unsigned long nodes_per_bit : 7; /* RW */ | 1518 | unsigned long nodes_per_bit:7; /* RW */ |
1192 | unsigned long rsvd_55 : 1; /* */ | 1519 | unsigned long rsvd_55:1; |
1193 | unsigned long ni_port : 4; /* RO */ | 1520 | unsigned long ni_port:4; /* RO */ |
1194 | unsigned long rsvd_60_63 : 4; /* */ | 1521 | unsigned long rsvd_60_63:4; |
1195 | } s1; | 1522 | } s1; |
1196 | struct uv2h_node_id_s { | 1523 | struct uv2h_node_id_s { |
1197 | unsigned long force1 : 1; /* RO */ | 1524 | unsigned long force1:1; /* RO */ |
1198 | unsigned long manufacturer : 11; /* RO */ | 1525 | unsigned long manufacturer:11; /* RO */ |
1199 | unsigned long part_number : 16; /* RO */ | 1526 | unsigned long part_number:16; /* RO */ |
1200 | unsigned long revision : 4; /* RO */ | 1527 | unsigned long revision:4; /* RO */ |
1201 | unsigned long node_id : 15; /* RW */ | 1528 | unsigned long node_id:15; /* RW */ |
1202 | unsigned long rsvd_47_49 : 3; /* */ | 1529 | unsigned long rsvd_47_49:3; |
1203 | unsigned long nodes_per_bit : 7; /* RO */ | 1530 | unsigned long nodes_per_bit:7; /* RO */ |
1204 | unsigned long ni_port : 5; /* RO */ | 1531 | unsigned long ni_port:5; /* RO */ |
1205 | unsigned long rsvd_62_63 : 2; /* */ | 1532 | unsigned long rsvd_62_63:2; |
1206 | } s2; | 1533 | } s2; |
1207 | }; | 1534 | }; |
1208 | 1535 | ||
1209 | /* ========================================================================= */ | 1536 | /* ========================================================================= */ |
1210 | /* UVH_NODE_PRESENT_TABLE */ | 1537 | /* UVH_NODE_PRESENT_TABLE */ |
1211 | /* ========================================================================= */ | 1538 | /* ========================================================================= */ |
1212 | #define UVH_NODE_PRESENT_TABLE 0x1400UL | 1539 | #define UVH_NODE_PRESENT_TABLE 0x1400UL |
1213 | #define UVH_NODE_PRESENT_TABLE_DEPTH 16 | 1540 | #define UVH_NODE_PRESENT_TABLE_DEPTH 16 |
1214 | 1541 | ||
1215 | #define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 | 1542 | #define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 |
1216 | #define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL | 1543 | #define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL |
1217 | 1544 | ||
1218 | union uvh_node_present_table_u { | 1545 | union uvh_node_present_table_u { |
1219 | unsigned long v; | 1546 | unsigned long v; |
1220 | struct uvh_node_present_table_s { | 1547 | struct uvh_node_present_table_s { |
1221 | unsigned long nodes : 64; /* RW */ | 1548 | unsigned long nodes:64; /* RW */ |
1222 | } s; | 1549 | } s; |
1223 | }; | 1550 | }; |
1224 | 1551 | ||
1225 | /* ========================================================================= */ | 1552 | /* ========================================================================= */ |
1226 | /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ | 1553 | /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ |
1227 | /* ========================================================================= */ | 1554 | /* ========================================================================= */ |
1228 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL | 1555 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL |
1229 | 1556 | ||
1230 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 | 1557 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 |
1231 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL | ||
1232 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 | 1558 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 |
1233 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL | ||
1234 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 | 1559 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 |
1560 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL | ||
1561 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL | ||
1235 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL | 1562 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL |
1236 | 1563 | ||
1237 | union uvh_rh_gam_alias210_overlay_config_0_mmr_u { | 1564 | union uvh_rh_gam_alias210_overlay_config_0_mmr_u { |
1238 | unsigned long v; | 1565 | unsigned long v; |
1239 | struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { | 1566 | struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { |
1240 | unsigned long rsvd_0_23: 24; /* */ | 1567 | unsigned long rsvd_0_23:24; |
1241 | unsigned long base : 8; /* RW */ | 1568 | unsigned long base:8; /* RW */ |
1242 | unsigned long rsvd_32_47: 16; /* */ | 1569 | unsigned long rsvd_32_47:16; |
1243 | unsigned long m_alias : 5; /* RW */ | 1570 | unsigned long m_alias:5; /* RW */ |
1244 | unsigned long rsvd_53_62: 10; /* */ | 1571 | unsigned long rsvd_53_62:10; |
1245 | unsigned long enable : 1; /* RW */ | 1572 | unsigned long enable:1; /* RW */ |
1246 | } s; | 1573 | } s; |
1247 | }; | 1574 | }; |
1248 | 1575 | ||
1249 | /* ========================================================================= */ | 1576 | /* ========================================================================= */ |
1250 | /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ | 1577 | /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ |
1251 | /* ========================================================================= */ | 1578 | /* ========================================================================= */ |
1252 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL | 1579 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL |
1253 | 1580 | ||
1254 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 | 1581 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 |
1255 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL | ||
1256 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 | 1582 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 |
1257 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL | ||
1258 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 | 1583 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 |
1584 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL | ||
1585 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL | ||
1259 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL | 1586 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL |
1260 | 1587 | ||
1261 | union uvh_rh_gam_alias210_overlay_config_1_mmr_u { | 1588 | union uvh_rh_gam_alias210_overlay_config_1_mmr_u { |
1262 | unsigned long v; | 1589 | unsigned long v; |
1263 | struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { | 1590 | struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { |
1264 | unsigned long rsvd_0_23: 24; /* */ | 1591 | unsigned long rsvd_0_23:24; |
1265 | unsigned long base : 8; /* RW */ | 1592 | unsigned long base:8; /* RW */ |
1266 | unsigned long rsvd_32_47: 16; /* */ | 1593 | unsigned long rsvd_32_47:16; |
1267 | unsigned long m_alias : 5; /* RW */ | 1594 | unsigned long m_alias:5; /* RW */ |
1268 | unsigned long rsvd_53_62: 10; /* */ | 1595 | unsigned long rsvd_53_62:10; |
1269 | unsigned long enable : 1; /* RW */ | 1596 | unsigned long enable:1; /* RW */ |
1270 | } s; | 1597 | } s; |
1271 | }; | 1598 | }; |
1272 | 1599 | ||
1273 | /* ========================================================================= */ | 1600 | /* ========================================================================= */ |
1274 | /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ | 1601 | /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ |
1275 | /* ========================================================================= */ | 1602 | /* ========================================================================= */ |
1276 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL | 1603 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL |
1277 | 1604 | ||
1278 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 | 1605 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 |
1279 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL | ||
1280 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 | 1606 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 |
1281 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL | ||
1282 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 | 1607 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 |
1608 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL | ||
1609 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL | ||
1283 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL | 1610 | #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL |
1284 | 1611 | ||
1285 | union uvh_rh_gam_alias210_overlay_config_2_mmr_u { | 1612 | union uvh_rh_gam_alias210_overlay_config_2_mmr_u { |
1286 | unsigned long v; | 1613 | unsigned long v; |
1287 | struct uvh_rh_gam_alias210_overlay_config_2_mmr_s { | 1614 | struct uvh_rh_gam_alias210_overlay_config_2_mmr_s { |
1288 | unsigned long rsvd_0_23: 24; /* */ | 1615 | unsigned long rsvd_0_23:24; |
1289 | unsigned long base : 8; /* RW */ | 1616 | unsigned long base:8; /* RW */ |
1290 | unsigned long rsvd_32_47: 16; /* */ | 1617 | unsigned long rsvd_32_47:16; |
1291 | unsigned long m_alias : 5; /* RW */ | 1618 | unsigned long m_alias:5; /* RW */ |
1292 | unsigned long rsvd_53_62: 10; /* */ | 1619 | unsigned long rsvd_53_62:10; |
1293 | unsigned long enable : 1; /* RW */ | 1620 | unsigned long enable:1; /* RW */ |
1294 | } s; | 1621 | } s; |
1295 | }; | 1622 | }; |
1296 | 1623 | ||
1297 | /* ========================================================================= */ | 1624 | /* ========================================================================= */ |
1298 | /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ | 1625 | /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ |
1299 | /* ========================================================================= */ | 1626 | /* ========================================================================= */ |
1300 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL | 1627 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL |
1301 | 1628 | ||
1302 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 | 1629 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 |
1303 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL | 1630 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL |
1304 | 1631 | ||
1305 | union uvh_rh_gam_alias210_redirect_config_0_mmr_u { | 1632 | union uvh_rh_gam_alias210_redirect_config_0_mmr_u { |
1306 | unsigned long v; | 1633 | unsigned long v; |
1307 | struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { | 1634 | struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { |
1308 | unsigned long rsvd_0_23 : 24; /* */ | 1635 | unsigned long rsvd_0_23:24; |
1309 | unsigned long dest_base : 22; /* RW */ | 1636 | unsigned long dest_base:22; /* RW */ |
1310 | unsigned long rsvd_46_63: 18; /* */ | 1637 | unsigned long rsvd_46_63:18; |
1311 | } s; | 1638 | } s; |
1312 | }; | 1639 | }; |
1313 | 1640 | ||
1314 | /* ========================================================================= */ | 1641 | /* ========================================================================= */ |
1315 | /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ | 1642 | /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ |
1316 | /* ========================================================================= */ | 1643 | /* ========================================================================= */ |
1317 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL | 1644 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL |
1318 | 1645 | ||
1319 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 | 1646 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 |
1320 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL | 1647 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL |
1321 | 1648 | ||
1322 | union uvh_rh_gam_alias210_redirect_config_1_mmr_u { | 1649 | union uvh_rh_gam_alias210_redirect_config_1_mmr_u { |
1323 | unsigned long v; | 1650 | unsigned long v; |
1324 | struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { | 1651 | struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { |
1325 | unsigned long rsvd_0_23 : 24; /* */ | 1652 | unsigned long rsvd_0_23:24; |
1326 | unsigned long dest_base : 22; /* RW */ | 1653 | unsigned long dest_base:22; /* RW */ |
1327 | unsigned long rsvd_46_63: 18; /* */ | 1654 | unsigned long rsvd_46_63:18; |
1328 | } s; | 1655 | } s; |
1329 | }; | 1656 | }; |
1330 | 1657 | ||
1331 | /* ========================================================================= */ | 1658 | /* ========================================================================= */ |
1332 | /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ | 1659 | /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ |
1333 | /* ========================================================================= */ | 1660 | /* ========================================================================= */ |
1334 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL | 1661 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL |
1335 | 1662 | ||
1336 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 | 1663 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 |
1337 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL | 1664 | #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL |
1338 | 1665 | ||
1339 | union uvh_rh_gam_alias210_redirect_config_2_mmr_u { | 1666 | union uvh_rh_gam_alias210_redirect_config_2_mmr_u { |
1340 | unsigned long v; | 1667 | unsigned long v; |
1341 | struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { | 1668 | struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { |
1342 | unsigned long rsvd_0_23 : 24; /* */ | 1669 | unsigned long rsvd_0_23:24; |
1343 | unsigned long dest_base : 22; /* RW */ | 1670 | unsigned long dest_base:22; /* RW */ |
1344 | unsigned long rsvd_46_63: 18; /* */ | 1671 | unsigned long rsvd_46_63:18; |
1345 | } s; | 1672 | } s; |
1346 | }; | 1673 | }; |
1347 | 1674 | ||
1348 | /* ========================================================================= */ | 1675 | /* ========================================================================= */ |
1349 | /* UVH_RH_GAM_CONFIG_MMR */ | 1676 | /* UVH_RH_GAM_CONFIG_MMR */ |
1350 | /* ========================================================================= */ | 1677 | /* ========================================================================= */ |
1351 | #define UVH_RH_GAM_CONFIG_MMR 0x1600000UL | 1678 | #define UVH_RH_GAM_CONFIG_MMR 0x1600000UL |
1352 | 1679 | ||
1353 | #define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 | 1680 | #define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 |
1354 | #define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL | 1681 | #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 |
1355 | #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 | 1682 | #define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL |
1356 | #define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL | 1683 | #define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL |
1357 | 1684 | ||
1358 | #define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 | 1685 | #define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 |
1359 | #define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL | 1686 | #define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 |
1360 | #define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 | 1687 | #define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 |
1361 | #define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL | 1688 | #define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL |
1362 | #define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 | 1689 | #define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL |
1363 | #define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL | 1690 | #define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL |
1364 | 1691 | ||
1365 | #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 | 1692 | #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 |
1366 | #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL | 1693 | #define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 |
1367 | #define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 | 1694 | #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL |
1368 | #define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL | 1695 | #define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL |
1369 | 1696 | ||
1370 | union uvh_rh_gam_config_mmr_u { | 1697 | union uvh_rh_gam_config_mmr_u { |
1371 | unsigned long v; | 1698 | unsigned long v; |
1372 | struct uvh_rh_gam_config_mmr_s { | 1699 | struct uvh_rh_gam_config_mmr_s { |
1373 | unsigned long m_skt : 6; /* RW */ | 1700 | unsigned long m_skt:6; /* RW */ |
1374 | unsigned long n_skt : 4; /* RW */ | 1701 | unsigned long n_skt:4; /* RW */ |
1375 | unsigned long rsvd_10_63 : 54; | 1702 | unsigned long rsvd_10_63:54; |
1376 | } s; | 1703 | } s; |
1377 | struct uv1h_rh_gam_config_mmr_s { | 1704 | struct uv1h_rh_gam_config_mmr_s { |
1378 | unsigned long m_skt : 6; /* RW */ | 1705 | unsigned long m_skt:6; /* RW */ |
1379 | unsigned long n_skt : 4; /* RW */ | 1706 | unsigned long n_skt:4; /* RW */ |
1380 | unsigned long rsvd_10_11: 2; /* */ | 1707 | unsigned long rsvd_10_11:2; |
1381 | unsigned long mmiol_cfg : 1; /* RW */ | 1708 | unsigned long mmiol_cfg:1; /* RW */ |
1382 | unsigned long rsvd_13_63: 51; /* */ | 1709 | unsigned long rsvd_13_63:51; |
1383 | } s1; | 1710 | } s1; |
1384 | struct uv2h_rh_gam_config_mmr_s { | 1711 | struct uv2h_rh_gam_config_mmr_s { |
1385 | unsigned long m_skt : 6; /* RW */ | 1712 | unsigned long m_skt:6; /* RW */ |
1386 | unsigned long n_skt : 4; /* RW */ | 1713 | unsigned long n_skt:4; /* RW */ |
1387 | unsigned long rsvd_10_63: 54; /* */ | 1714 | unsigned long rsvd_10_63:54; |
1388 | } s2; | 1715 | } s2; |
1389 | }; | 1716 | }; |
1390 | 1717 | ||
1391 | /* ========================================================================= */ | 1718 | /* ========================================================================= */ |
1392 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ | 1719 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ |
1393 | /* ========================================================================= */ | 1720 | /* ========================================================================= */ |
1394 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL | 1721 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL |
1395 | 1722 | ||
1396 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 | 1723 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 |
1397 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL | 1724 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL |
1398 | 1725 | ||
1399 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 | 1726 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 |
1400 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL | 1727 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 |
1401 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 | 1728 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 |
1402 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL | 1729 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 |
1403 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 | 1730 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL |
1404 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL | 1731 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL |
1405 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | 1732 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL |
1406 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | 1733 | #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL |
1407 | 1734 | ||
1408 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 | 1735 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 |
1409 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL | 1736 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 |
1410 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 | 1737 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 |
1411 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL | 1738 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL |
1412 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | 1739 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL |
1413 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | 1740 | #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL |
1414 | 1741 | ||
1415 | union uvh_rh_gam_gru_overlay_config_mmr_u { | 1742 | union uvh_rh_gam_gru_overlay_config_mmr_u { |
1416 | unsigned long v; | 1743 | unsigned long v; |
1417 | struct uvh_rh_gam_gru_overlay_config_mmr_s { | 1744 | struct uvh_rh_gam_gru_overlay_config_mmr_s { |
1418 | unsigned long rsvd_0_27: 28; /* */ | 1745 | unsigned long rsvd_0_27:28; |
1419 | unsigned long base : 18; /* RW */ | 1746 | unsigned long base:18; /* RW */ |
1420 | unsigned long rsvd_46_62 : 17; | 1747 | unsigned long rsvd_46_62:17; |
1421 | unsigned long enable : 1; /* RW */ | 1748 | unsigned long enable:1; /* RW */ |
1422 | } s; | 1749 | } s; |
1423 | struct uv1h_rh_gam_gru_overlay_config_mmr_s { | 1750 | struct uv1h_rh_gam_gru_overlay_config_mmr_s { |
1424 | unsigned long rsvd_0_27: 28; /* */ | 1751 | unsigned long rsvd_0_27:28; |
1425 | unsigned long base : 18; /* RW */ | 1752 | unsigned long base:18; /* RW */ |
1426 | unsigned long rsvd_46_47: 2; /* */ | 1753 | unsigned long rsvd_46_47:2; |
1427 | unsigned long gr4 : 1; /* RW */ | 1754 | unsigned long gr4:1; /* RW */ |
1428 | unsigned long rsvd_49_51: 3; /* */ | 1755 | unsigned long rsvd_49_51:3; |
1429 | unsigned long n_gru : 4; /* RW */ | 1756 | unsigned long n_gru:4; /* RW */ |
1430 | unsigned long rsvd_56_62: 7; /* */ | 1757 | unsigned long rsvd_56_62:7; |
1431 | unsigned long enable : 1; /* RW */ | 1758 | unsigned long enable:1; /* RW */ |
1432 | } s1; | 1759 | } s1; |
1433 | struct uv2h_rh_gam_gru_overlay_config_mmr_s { | 1760 | struct uv2h_rh_gam_gru_overlay_config_mmr_s { |
1434 | unsigned long rsvd_0_27: 28; /* */ | 1761 | unsigned long rsvd_0_27:28; |
1435 | unsigned long base : 18; /* RW */ | 1762 | unsigned long base:18; /* RW */ |
1436 | unsigned long rsvd_46_51: 6; /* */ | 1763 | unsigned long rsvd_46_51:6; |
1437 | unsigned long n_gru : 4; /* RW */ | 1764 | unsigned long n_gru:4; /* RW */ |
1438 | unsigned long rsvd_56_62: 7; /* */ | 1765 | unsigned long rsvd_56_62:7; |
1439 | unsigned long enable : 1; /* RW */ | 1766 | unsigned long enable:1; /* RW */ |
1440 | } s2; | 1767 | } s2; |
1441 | }; | 1768 | }; |
1442 | 1769 | ||
1443 | /* ========================================================================= */ | 1770 | /* ========================================================================= */ |
1444 | /* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ | 1771 | /* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ |
1445 | /* ========================================================================= */ | 1772 | /* ========================================================================= */ |
1446 | #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL | 1773 | #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL |
1447 | 1774 | ||
1448 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 | 1775 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 |
1449 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL | 1776 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 |
1450 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 | 1777 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 |
1451 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL | ||
1452 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 | ||
1453 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL | ||
1454 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | 1778 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 |
1779 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL | ||
1780 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL | ||
1781 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL | ||
1455 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | 1782 | #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL |
1456 | 1783 | ||
1457 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27 | 1784 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27 |
1458 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL | 1785 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 |
1459 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 | 1786 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 |
1460 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL | ||
1461 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 | ||
1462 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL | ||
1463 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | 1787 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 |
1788 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL | ||
1789 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL | ||
1790 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL | ||
1464 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | 1791 | #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL |
1465 | 1792 | ||
1466 | union uvh_rh_gam_mmioh_overlay_config_mmr_u { | 1793 | union uvh_rh_gam_mmioh_overlay_config_mmr_u { |
1467 | unsigned long v; | 1794 | unsigned long v; |
1468 | struct uv1h_rh_gam_mmioh_overlay_config_mmr_s { | 1795 | struct uv1h_rh_gam_mmioh_overlay_config_mmr_s { |
1469 | unsigned long rsvd_0_29: 30; /* */ | 1796 | unsigned long rsvd_0_29:30; |
1470 | unsigned long base : 16; /* RW */ | 1797 | unsigned long base:16; /* RW */ |
1471 | unsigned long m_io : 6; /* RW */ | 1798 | unsigned long m_io:6; /* RW */ |
1472 | unsigned long n_io : 4; /* RW */ | 1799 | unsigned long n_io:4; /* RW */ |
1473 | unsigned long rsvd_56_62: 7; /* */ | 1800 | unsigned long rsvd_56_62:7; |
1474 | unsigned long enable : 1; /* RW */ | 1801 | unsigned long enable:1; /* RW */ |
1475 | } s1; | 1802 | } s1; |
1476 | struct uv2h_rh_gam_mmioh_overlay_config_mmr_s { | 1803 | struct uv2h_rh_gam_mmioh_overlay_config_mmr_s { |
1477 | unsigned long rsvd_0_26: 27; /* */ | 1804 | unsigned long rsvd_0_26:27; |
1478 | unsigned long base : 19; /* RW */ | 1805 | unsigned long base:19; /* RW */ |
1479 | unsigned long m_io : 6; /* RW */ | 1806 | unsigned long m_io:6; /* RW */ |
1480 | unsigned long n_io : 4; /* RW */ | 1807 | unsigned long n_io:4; /* RW */ |
1481 | unsigned long rsvd_56_62: 7; /* */ | 1808 | unsigned long rsvd_56_62:7; |
1482 | unsigned long enable : 1; /* RW */ | 1809 | unsigned long enable:1; /* RW */ |
1483 | } s2; | 1810 | } s2; |
1484 | }; | 1811 | }; |
1485 | 1812 | ||
1486 | /* ========================================================================= */ | 1813 | /* ========================================================================= */ |
1487 | /* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ | 1814 | /* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ |
1488 | /* ========================================================================= */ | 1815 | /* ========================================================================= */ |
1489 | #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL | 1816 | #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL |
1490 | 1817 | ||
1491 | #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 | 1818 | #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 |
1492 | #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL | 1819 | #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL |
1493 | 1820 | ||
1494 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 | 1821 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 |
1495 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL | ||
1496 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 | 1822 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 |
1823 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | ||
1824 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL | ||
1497 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL | 1825 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL |
1498 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | 1826 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL |
1499 | #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | ||
1500 | 1827 | ||
1501 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 | 1828 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 |
1502 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL | 1829 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 |
1503 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | 1830 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL |
1504 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | 1831 | #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL |
1505 | 1832 | ||
1506 | union uvh_rh_gam_mmr_overlay_config_mmr_u { | 1833 | union uvh_rh_gam_mmr_overlay_config_mmr_u { |
1507 | unsigned long v; | 1834 | unsigned long v; |
1508 | struct uvh_rh_gam_mmr_overlay_config_mmr_s { | 1835 | struct uvh_rh_gam_mmr_overlay_config_mmr_s { |
1509 | unsigned long rsvd_0_25: 26; /* */ | 1836 | unsigned long rsvd_0_25:26; |
1510 | unsigned long base : 20; /* RW */ | 1837 | unsigned long base:20; /* RW */ |
1511 | unsigned long rsvd_46_62 : 17; | 1838 | unsigned long rsvd_46_62:17; |
1512 | unsigned long enable : 1; /* RW */ | 1839 | unsigned long enable:1; /* RW */ |
1513 | } s; | 1840 | } s; |
1514 | struct uv1h_rh_gam_mmr_overlay_config_mmr_s { | 1841 | struct uv1h_rh_gam_mmr_overlay_config_mmr_s { |
1515 | unsigned long rsvd_0_25: 26; /* */ | 1842 | unsigned long rsvd_0_25:26; |
1516 | unsigned long base : 20; /* RW */ | 1843 | unsigned long base:20; /* RW */ |
1517 | unsigned long dual_hub : 1; /* RW */ | 1844 | unsigned long dual_hub:1; /* RW */ |
1518 | unsigned long rsvd_47_62: 16; /* */ | 1845 | unsigned long rsvd_47_62:16; |
1519 | unsigned long enable : 1; /* RW */ | 1846 | unsigned long enable:1; /* RW */ |
1520 | } s1; | 1847 | } s1; |
1521 | struct uv2h_rh_gam_mmr_overlay_config_mmr_s { | 1848 | struct uv2h_rh_gam_mmr_overlay_config_mmr_s { |
1522 | unsigned long rsvd_0_25: 26; /* */ | 1849 | unsigned long rsvd_0_25:26; |
1523 | unsigned long base : 20; /* RW */ | 1850 | unsigned long base:20; /* RW */ |
1524 | unsigned long rsvd_46_62: 17; /* */ | 1851 | unsigned long rsvd_46_62:17; |
1525 | unsigned long enable : 1; /* RW */ | 1852 | unsigned long enable:1; /* RW */ |
1526 | } s2; | 1853 | } s2; |
1527 | }; | 1854 | }; |
1528 | 1855 | ||
1529 | /* ========================================================================= */ | 1856 | /* ========================================================================= */ |
1530 | /* UVH_RTC */ | 1857 | /* UVH_RTC */ |
1531 | /* ========================================================================= */ | 1858 | /* ========================================================================= */ |
1532 | #define UVH_RTC 0x340000UL | 1859 | #define UVH_RTC 0x340000UL |
1533 | 1860 | ||
1534 | #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 | 1861 | #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 |
1535 | #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL | 1862 | #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL |
1536 | 1863 | ||
1537 | union uvh_rtc_u { | 1864 | union uvh_rtc_u { |
1538 | unsigned long v; | 1865 | unsigned long v; |
1539 | struct uvh_rtc_s { | 1866 | struct uvh_rtc_s { |
1540 | unsigned long real_time_clock : 56; /* RW */ | 1867 | unsigned long real_time_clock:56; /* RW */ |
1541 | unsigned long rsvd_56_63 : 8; /* */ | 1868 | unsigned long rsvd_56_63:8; |
1542 | } s; | 1869 | } s; |
1543 | }; | 1870 | }; |
1544 | 1871 | ||
1545 | /* ========================================================================= */ | 1872 | /* ========================================================================= */ |
1546 | /* UVH_RTC1_INT_CONFIG */ | 1873 | /* UVH_RTC1_INT_CONFIG */ |
1547 | /* ========================================================================= */ | 1874 | /* ========================================================================= */ |
1548 | #define UVH_RTC1_INT_CONFIG 0x615c0UL | 1875 | #define UVH_RTC1_INT_CONFIG 0x615c0UL |
1549 | 1876 | ||
1550 | #define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 | 1877 | #define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 |
1551 | #define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL | 1878 | #define UVH_RTC1_INT_CONFIG_DM_SHFT 8 |
1552 | #define UVH_RTC1_INT_CONFIG_DM_SHFT 8 | 1879 | #define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 |
1553 | #define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL | 1880 | #define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 |
1554 | #define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 | 1881 | #define UVH_RTC1_INT_CONFIG_P_SHFT 13 |
1555 | #define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL | 1882 | #define UVH_RTC1_INT_CONFIG_T_SHFT 15 |
1556 | #define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 | 1883 | #define UVH_RTC1_INT_CONFIG_M_SHFT 16 |
1557 | #define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL | 1884 | #define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 |
1558 | #define UVH_RTC1_INT_CONFIG_P_SHFT 13 | 1885 | #define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL |
1559 | #define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL | 1886 | #define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL |
1560 | #define UVH_RTC1_INT_CONFIG_T_SHFT 15 | 1887 | #define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL |
1561 | #define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL | 1888 | #define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL |
1562 | #define UVH_RTC1_INT_CONFIG_M_SHFT 16 | 1889 | #define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL |
1563 | #define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL | 1890 | #define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL |
1564 | #define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 | 1891 | #define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL |
1565 | #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | 1892 | #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL |
1566 | 1893 | ||
1567 | union uvh_rtc1_int_config_u { | 1894 | union uvh_rtc1_int_config_u { |
1568 | unsigned long v; | 1895 | unsigned long v; |
1569 | struct uvh_rtc1_int_config_s { | 1896 | struct uvh_rtc1_int_config_s { |
1570 | unsigned long vector_ : 8; /* RW */ | 1897 | unsigned long vector_:8; /* RW */ |
1571 | unsigned long dm : 3; /* RW */ | 1898 | unsigned long dm:3; /* RW */ |
1572 | unsigned long destmode : 1; /* RW */ | 1899 | unsigned long destmode:1; /* RW */ |
1573 | unsigned long status : 1; /* RO */ | 1900 | unsigned long status:1; /* RO */ |
1574 | unsigned long p : 1; /* RO */ | 1901 | unsigned long p:1; /* RO */ |
1575 | unsigned long rsvd_14 : 1; /* */ | 1902 | unsigned long rsvd_14:1; |
1576 | unsigned long t : 1; /* RO */ | 1903 | unsigned long t:1; /* RO */ |
1577 | unsigned long m : 1; /* RW */ | 1904 | unsigned long m:1; /* RW */ |
1578 | unsigned long rsvd_17_31: 15; /* */ | 1905 | unsigned long rsvd_17_31:15; |
1579 | unsigned long apic_id : 32; /* RW */ | 1906 | unsigned long apic_id:32; /* RW */ |
1580 | } s; | 1907 | } s; |
1581 | }; | 1908 | }; |
1582 | 1909 | ||
1583 | /* ========================================================================= */ | 1910 | /* ========================================================================= */ |
1584 | /* UVH_SCRATCH5 */ | 1911 | /* UVH_SCRATCH5 */ |
1585 | /* ========================================================================= */ | 1912 | /* ========================================================================= */ |
1586 | #define UVH_SCRATCH5 0x2d0200UL | 1913 | #define UVH_SCRATCH5 0x2d0200UL |
1587 | #define UVH_SCRATCH5_32 0x778 | 1914 | #define UVH_SCRATCH5_32 0x778 |
1588 | 1915 | ||
1589 | #define UVH_SCRATCH5_SCRATCH5_SHFT 0 | 1916 | #define UVH_SCRATCH5_SCRATCH5_SHFT 0 |
1590 | #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL | 1917 | #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL |
1591 | 1918 | ||
1592 | union uvh_scratch5_u { | 1919 | union uvh_scratch5_u { |
1593 | unsigned long v; | 1920 | unsigned long v; |
1594 | struct uvh_scratch5_s { | 1921 | struct uvh_scratch5_s { |
1595 | unsigned long scratch5 : 64; /* RW, W1CS */ | 1922 | unsigned long scratch5:64; /* RW, W1CS */ |
1596 | } s; | 1923 | } s; |
1597 | }; | 1924 | }; |
1598 | 1925 | ||
1599 | /* ========================================================================= */ | 1926 | /* ========================================================================= */ |
1600 | /* UV2H_EVENT_OCCURRED2 */ | 1927 | /* UV2H_EVENT_OCCURRED2 */ |
1601 | /* ========================================================================= */ | 1928 | /* ========================================================================= */ |
1602 | #define UV2H_EVENT_OCCURRED2 0x70100UL | 1929 | #define UV2H_EVENT_OCCURRED2 0x70100UL |
1603 | #define UV2H_EVENT_OCCURRED2_32 0xb68 | 1930 | #define UV2H_EVENT_OCCURRED2_32 0xb68 |
1604 | 1931 | ||
1605 | #define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 | 1932 | #define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 |
1606 | #define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL | 1933 | #define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 |
1607 | #define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 | 1934 | #define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 |
1608 | #define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL | 1935 | #define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 |
1609 | #define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 | 1936 | #define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 |
1610 | #define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL | 1937 | #define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 |
1611 | #define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 | 1938 | #define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 |
1612 | #define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL | 1939 | #define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 |
1613 | #define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 | 1940 | #define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 |
1614 | #define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL | 1941 | #define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 |
1615 | #define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 | 1942 | #define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 |
1616 | #define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL | 1943 | #define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 |
1617 | #define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 | 1944 | #define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 |
1618 | #define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL | 1945 | #define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 |
1619 | #define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 | 1946 | #define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 |
1620 | #define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL | 1947 | #define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 |
1621 | #define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 | 1948 | #define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 |
1622 | #define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL | 1949 | #define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 |
1623 | #define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 | 1950 | #define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 |
1624 | #define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL | 1951 | #define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 |
1625 | #define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 | 1952 | #define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 |
1626 | #define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL | 1953 | #define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 |
1627 | #define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 | 1954 | #define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 |
1628 | #define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL | 1955 | #define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 |
1629 | #define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 | 1956 | #define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 |
1630 | #define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL | 1957 | #define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 |
1631 | #define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 | 1958 | #define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 |
1632 | #define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL | 1959 | #define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 |
1633 | #define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 | 1960 | #define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 |
1634 | #define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL | 1961 | #define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 |
1635 | #define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 | 1962 | #define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 |
1636 | #define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL | 1963 | #define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 |
1637 | #define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 | 1964 | #define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL |
1638 | #define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL | 1965 | #define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL |
1639 | #define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 | 1966 | #define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL |
1640 | #define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL | 1967 | #define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL |
1641 | #define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 | 1968 | #define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL |
1642 | #define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL | 1969 | #define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL |
1643 | #define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 | 1970 | #define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL |
1644 | #define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL | 1971 | #define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL |
1645 | #define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 | 1972 | #define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL |
1646 | #define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL | 1973 | #define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL |
1647 | #define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 | 1974 | #define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL |
1648 | #define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL | 1975 | #define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL |
1649 | #define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 | 1976 | #define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL |
1650 | #define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL | 1977 | #define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL |
1651 | #define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 | 1978 | #define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL |
1652 | #define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL | 1979 | #define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL |
1653 | #define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 | 1980 | #define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL |
1654 | #define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL | 1981 | #define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL |
1655 | #define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 | 1982 | #define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL |
1656 | #define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL | 1983 | #define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL |
1657 | #define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 | 1984 | #define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL |
1658 | #define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL | 1985 | #define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL |
1659 | #define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 | 1986 | #define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL |
1660 | #define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL | 1987 | #define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL |
1661 | #define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 | 1988 | #define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL |
1662 | #define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL | 1989 | #define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL |
1663 | #define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 | 1990 | #define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL |
1664 | #define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL | 1991 | #define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL |
1665 | #define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 | 1992 | #define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL |
1666 | #define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL | 1993 | #define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL |
1667 | #define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 | 1994 | #define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL |
1668 | #define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL | 1995 | #define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL |
1669 | 1996 | ||
1670 | union uv2h_event_occurred2_u { | 1997 | union uv2h_event_occurred2_u { |
1671 | unsigned long v; | 1998 | unsigned long v; |
1672 | struct uv2h_event_occurred2_s { | 1999 | struct uv2h_event_occurred2_s { |
1673 | unsigned long rtc_0 : 1; /* RW */ | 2000 | unsigned long rtc_0:1; /* RW */ |
1674 | unsigned long rtc_1 : 1; /* RW */ | 2001 | unsigned long rtc_1:1; /* RW */ |
1675 | unsigned long rtc_2 : 1; /* RW */ | 2002 | unsigned long rtc_2:1; /* RW */ |
1676 | unsigned long rtc_3 : 1; /* RW */ | 2003 | unsigned long rtc_3:1; /* RW */ |
1677 | unsigned long rtc_4 : 1; /* RW */ | 2004 | unsigned long rtc_4:1; /* RW */ |
1678 | unsigned long rtc_5 : 1; /* RW */ | 2005 | unsigned long rtc_5:1; /* RW */ |
1679 | unsigned long rtc_6 : 1; /* RW */ | 2006 | unsigned long rtc_6:1; /* RW */ |
1680 | unsigned long rtc_7 : 1; /* RW */ | 2007 | unsigned long rtc_7:1; /* RW */ |
1681 | unsigned long rtc_8 : 1; /* RW */ | 2008 | unsigned long rtc_8:1; /* RW */ |
1682 | unsigned long rtc_9 : 1; /* RW */ | 2009 | unsigned long rtc_9:1; /* RW */ |
1683 | unsigned long rtc_10 : 1; /* RW */ | 2010 | unsigned long rtc_10:1; /* RW */ |
1684 | unsigned long rtc_11 : 1; /* RW */ | 2011 | unsigned long rtc_11:1; /* RW */ |
1685 | unsigned long rtc_12 : 1; /* RW */ | 2012 | unsigned long rtc_12:1; /* RW */ |
1686 | unsigned long rtc_13 : 1; /* RW */ | 2013 | unsigned long rtc_13:1; /* RW */ |
1687 | unsigned long rtc_14 : 1; /* RW */ | 2014 | unsigned long rtc_14:1; /* RW */ |
1688 | unsigned long rtc_15 : 1; /* RW */ | 2015 | unsigned long rtc_15:1; /* RW */ |
1689 | unsigned long rtc_16 : 1; /* RW */ | 2016 | unsigned long rtc_16:1; /* RW */ |
1690 | unsigned long rtc_17 : 1; /* RW */ | 2017 | unsigned long rtc_17:1; /* RW */ |
1691 | unsigned long rtc_18 : 1; /* RW */ | 2018 | unsigned long rtc_18:1; /* RW */ |
1692 | unsigned long rtc_19 : 1; /* RW */ | 2019 | unsigned long rtc_19:1; /* RW */ |
1693 | unsigned long rtc_20 : 1; /* RW */ | 2020 | unsigned long rtc_20:1; /* RW */ |
1694 | unsigned long rtc_21 : 1; /* RW */ | 2021 | unsigned long rtc_21:1; /* RW */ |
1695 | unsigned long rtc_22 : 1; /* RW */ | 2022 | unsigned long rtc_22:1; /* RW */ |
1696 | unsigned long rtc_23 : 1; /* RW */ | 2023 | unsigned long rtc_23:1; /* RW */ |
1697 | unsigned long rtc_24 : 1; /* RW */ | 2024 | unsigned long rtc_24:1; /* RW */ |
1698 | unsigned long rtc_25 : 1; /* RW */ | 2025 | unsigned long rtc_25:1; /* RW */ |
1699 | unsigned long rtc_26 : 1; /* RW */ | 2026 | unsigned long rtc_26:1; /* RW */ |
1700 | unsigned long rtc_27 : 1; /* RW */ | 2027 | unsigned long rtc_27:1; /* RW */ |
1701 | unsigned long rtc_28 : 1; /* RW */ | 2028 | unsigned long rtc_28:1; /* RW */ |
1702 | unsigned long rtc_29 : 1; /* RW */ | 2029 | unsigned long rtc_29:1; /* RW */ |
1703 | unsigned long rtc_30 : 1; /* RW */ | 2030 | unsigned long rtc_30:1; /* RW */ |
1704 | unsigned long rtc_31 : 1; /* RW */ | 2031 | unsigned long rtc_31:1; /* RW */ |
1705 | unsigned long rsvd_32_63: 32; /* */ | 2032 | unsigned long rsvd_32_63:32; |
1706 | } s1; | 2033 | } s1; |
1707 | }; | 2034 | }; |
1708 | 2035 | ||
1709 | /* ========================================================================= */ | 2036 | /* ========================================================================= */ |
1710 | /* UV2H_EVENT_OCCURRED2_ALIAS */ | 2037 | /* UV2H_EVENT_OCCURRED2_ALIAS */ |
1711 | /* ========================================================================= */ | 2038 | /* ========================================================================= */ |
1712 | #define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL | 2039 | #define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL |
1713 | #define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 | 2040 | #define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 |
1714 | 2041 | ||
1715 | /* ========================================================================= */ | 2042 | /* ========================================================================= */ |
1716 | /* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */ | 2043 | /* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */ |
1717 | /* ========================================================================= */ | 2044 | /* ========================================================================= */ |
1718 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL | 2045 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL |
1719 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 | 2046 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 |
1720 | 2047 | ||
1721 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 | 2048 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 |
1722 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL | 2049 | #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL |
1723 | 2050 | ||
1724 | union uv2h_lb_bau_sb_activation_status_2_u { | 2051 | union uv2h_lb_bau_sb_activation_status_2_u { |
1725 | unsigned long v; | 2052 | unsigned long v; |
1726 | struct uv2h_lb_bau_sb_activation_status_2_s { | 2053 | struct uv2h_lb_bau_sb_activation_status_2_s { |
1727 | unsigned long aux_error : 64; /* RW */ | 2054 | unsigned long aux_error:64; /* RW */ |
1728 | } s1; | 2055 | } s1; |
1729 | }; | 2056 | }; |
1730 | 2057 | ||
1731 | /* ========================================================================= */ | 2058 | /* ========================================================================= */ |
1732 | /* UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK */ | 2059 | /* UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK */ |
1733 | /* ========================================================================= */ | 2060 | /* ========================================================================= */ |
1734 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL | 2061 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL |
1735 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x9f0 | 2062 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x9f0 |
1736 | 2063 | ||
1737 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 | 2064 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 |
1738 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL | 2065 | #define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL |
1739 | 2066 | ||
1740 | union uv1h_lb_target_physical_apic_id_mask_u { | 2067 | union uv1h_lb_target_physical_apic_id_mask_u { |
1741 | unsigned long v; | 2068 | unsigned long v; |
1742 | struct uv1h_lb_target_physical_apic_id_mask_s { | 2069 | struct uv1h_lb_target_physical_apic_id_mask_s { |
1743 | unsigned long bit_enables : 32; /* RW */ | 2070 | unsigned long bit_enables:32; /* RW */ |
1744 | unsigned long rsvd_32_63 : 32; /* */ | 2071 | unsigned long rsvd_32_63:32; |
1745 | } s1; | 2072 | } s1; |
1746 | }; | 2073 | }; |
1747 | 2074 | ||
1748 | 2075 | ||
1749 | #endif /* __ASM_UV_MMRS_X86_H__ */ | 2076 | #endif /* _ASM_X86_UV_UV_MMRS_H */ |
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 646b4c1ca695..815285bcaceb 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -11,10 +11,9 @@ struct vsyscall_gtod_data { | |||
11 | time_t wall_time_sec; | 11 | time_t wall_time_sec; |
12 | u32 wall_time_nsec; | 12 | u32 wall_time_nsec; |
13 | 13 | ||
14 | int sysctl_enabled; | ||
15 | struct timezone sys_tz; | 14 | struct timezone sys_tz; |
16 | struct { /* extract of a clocksource struct */ | 15 | struct { /* extract of a clocksource struct */ |
17 | cycle_t (*vread)(void); | 16 | int vclock_mode; |
18 | cycle_t cycle_last; | 17 | cycle_t cycle_last; |
19 | cycle_t mask; | 18 | cycle_t mask; |
20 | u32 mult; | 19 | u32 mult; |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 84471b810460..2caf290e9895 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -132,6 +132,8 @@ enum vmcs_field { | |||
132 | GUEST_IA32_PAT_HIGH = 0x00002805, | 132 | GUEST_IA32_PAT_HIGH = 0x00002805, |
133 | GUEST_IA32_EFER = 0x00002806, | 133 | GUEST_IA32_EFER = 0x00002806, |
134 | GUEST_IA32_EFER_HIGH = 0x00002807, | 134 | GUEST_IA32_EFER_HIGH = 0x00002807, |
135 | GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, | ||
136 | GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, | ||
135 | GUEST_PDPTR0 = 0x0000280a, | 137 | GUEST_PDPTR0 = 0x0000280a, |
136 | GUEST_PDPTR0_HIGH = 0x0000280b, | 138 | GUEST_PDPTR0_HIGH = 0x0000280b, |
137 | GUEST_PDPTR1 = 0x0000280c, | 139 | GUEST_PDPTR1 = 0x0000280c, |
@@ -144,6 +146,8 @@ enum vmcs_field { | |||
144 | HOST_IA32_PAT_HIGH = 0x00002c01, | 146 | HOST_IA32_PAT_HIGH = 0x00002c01, |
145 | HOST_IA32_EFER = 0x00002c02, | 147 | HOST_IA32_EFER = 0x00002c02, |
146 | HOST_IA32_EFER_HIGH = 0x00002c03, | 148 | HOST_IA32_EFER_HIGH = 0x00002c03, |
149 | HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, | ||
150 | HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, | ||
147 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 151 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
148 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 152 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
149 | EXCEPTION_BITMAP = 0x00004004, | 153 | EXCEPTION_BITMAP = 0x00004004, |
@@ -426,4 +430,43 @@ struct vmx_msr_entry { | |||
426 | u64 value; | 430 | u64 value; |
427 | } __aligned(16); | 431 | } __aligned(16); |
428 | 432 | ||
433 | /* | ||
434 | * Exit Qualifications for entry failure during or after loading guest state | ||
435 | */ | ||
436 | #define ENTRY_FAIL_DEFAULT 0 | ||
437 | #define ENTRY_FAIL_PDPTE 2 | ||
438 | #define ENTRY_FAIL_NMI 3 | ||
439 | #define ENTRY_FAIL_VMCS_LINK_PTR 4 | ||
440 | |||
441 | /* | ||
442 | * VM-instruction error numbers | ||
443 | */ | ||
444 | enum vm_instruction_error_number { | ||
445 | VMXERR_VMCALL_IN_VMX_ROOT_OPERATION = 1, | ||
446 | VMXERR_VMCLEAR_INVALID_ADDRESS = 2, | ||
447 | VMXERR_VMCLEAR_VMXON_POINTER = 3, | ||
448 | VMXERR_VMLAUNCH_NONCLEAR_VMCS = 4, | ||
449 | VMXERR_VMRESUME_NONLAUNCHED_VMCS = 5, | ||
450 | VMXERR_VMRESUME_AFTER_VMXOFF = 6, | ||
451 | VMXERR_ENTRY_INVALID_CONTROL_FIELD = 7, | ||
452 | VMXERR_ENTRY_INVALID_HOST_STATE_FIELD = 8, | ||
453 | VMXERR_VMPTRLD_INVALID_ADDRESS = 9, | ||
454 | VMXERR_VMPTRLD_VMXON_POINTER = 10, | ||
455 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID = 11, | ||
456 | VMXERR_UNSUPPORTED_VMCS_COMPONENT = 12, | ||
457 | VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT = 13, | ||
458 | VMXERR_VMXON_IN_VMX_ROOT_OPERATION = 15, | ||
459 | VMXERR_ENTRY_INVALID_EXECUTIVE_VMCS_POINTER = 16, | ||
460 | VMXERR_ENTRY_NONLAUNCHED_EXECUTIVE_VMCS = 17, | ||
461 | VMXERR_ENTRY_EXECUTIVE_VMCS_POINTER_NOT_VMXON_POINTER = 18, | ||
462 | VMXERR_VMCALL_NONCLEAR_VMCS = 19, | ||
463 | VMXERR_VMCALL_INVALID_VM_EXIT_CONTROL_FIELDS = 20, | ||
464 | VMXERR_VMCALL_INCORRECT_MSEG_REVISION_ID = 22, | ||
465 | VMXERR_VMXOFF_UNDER_DUAL_MONITOR_TREATMENT_OF_SMIS_AND_SMM = 23, | ||
466 | VMXERR_VMCALL_INVALID_SMM_MONITOR_FEATURES = 24, | ||
467 | VMXERR_ENTRY_INVALID_VM_EXECUTION_CONTROL_FIELDS_IN_EXECUTIVE_VMCS = 25, | ||
468 | VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS = 26, | ||
469 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID = 28, | ||
470 | }; | ||
471 | |||
429 | #endif | 472 | #endif |
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d55597351f6a..60107072c28b 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h | |||
@@ -16,10 +16,6 @@ enum vsyscall_num { | |||
16 | #ifdef __KERNEL__ | 16 | #ifdef __KERNEL__ |
17 | #include <linux/seqlock.h> | 17 | #include <linux/seqlock.h> |
18 | 18 | ||
19 | /* Definitions for CONFIG_GENERIC_TIME definitions */ | ||
20 | #define __vsyscall_fn \ | ||
21 | __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace | ||
22 | |||
23 | #define VGETCPU_RDTSCP 1 | 19 | #define VGETCPU_RDTSCP 1 |
24 | #define VGETCPU_LSL 2 | 20 | #define VGETCPU_LSL 2 |
25 | 21 | ||
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 341b3559452b..de656ac2af41 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h | |||
@@ -10,15 +10,14 @@ | |||
10 | * In normal kernel code, they are used like any other variable. | 10 | * In normal kernel code, they are used like any other variable. |
11 | * In user code, they are accessed through the VVAR macro. | 11 | * In user code, they are accessed through the VVAR macro. |
12 | * | 12 | * |
13 | * Each of these variables lives in the vsyscall page, and each | 13 | * These variables live in a page of kernel data that has an extra RO |
14 | * one needs a unique offset within the little piece of the page | 14 | * mapping for userspace. Each variable needs a unique offset within |
15 | * reserved for vvars. Specify that offset in DECLARE_VVAR. | 15 | * that page; specify that offset with the DECLARE_VVAR macro. (If |
16 | * (There are 896 bytes available. If you mess up, the linker will | 16 | * you mess up, the linker will catch it.) |
17 | * catch it.) | ||
18 | */ | 17 | */ |
19 | 18 | ||
20 | /* Offset of vars within vsyscall page */ | 19 | /* Base address of vvars. This is not ABI. */ |
21 | #define VSYSCALL_VARS_OFFSET (3072 + 128) | 20 | #define VVAR_ADDRESS (-10*1024*1024 - 4096) |
22 | 21 | ||
23 | #if defined(__VVAR_KERNEL_LDS) | 22 | #if defined(__VVAR_KERNEL_LDS) |
24 | 23 | ||
@@ -26,17 +25,17 @@ | |||
26 | * right place. | 25 | * right place. |
27 | */ | 26 | */ |
28 | #define DECLARE_VVAR(offset, type, name) \ | 27 | #define DECLARE_VVAR(offset, type, name) \ |
29 | EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset) | 28 | EMIT_VVAR(name, offset) |
30 | 29 | ||
31 | #else | 30 | #else |
32 | 31 | ||
33 | #define DECLARE_VVAR(offset, type, name) \ | 32 | #define DECLARE_VVAR(offset, type, name) \ |
34 | static type const * const vvaraddr_ ## name = \ | 33 | static type const * const vvaraddr_ ## name = \ |
35 | (void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset)); | 34 | (void *)(VVAR_ADDRESS + (offset)); |
36 | 35 | ||
37 | #define DEFINE_VVAR(type, name) \ | 36 | #define DEFINE_VVAR(type, name) \ |
38 | type __vvar_ ## name \ | 37 | type name \ |
39 | __attribute__((section(".vsyscall_var_" #name), aligned(16))) | 38 | __attribute__((section(".vvar_" #name), aligned(16))) |
40 | 39 | ||
41 | #define VVAR(name) (*vvaraddr_ ## name) | 40 | #define VVAR(name) (*vvaraddr_ ## name) |
42 | 41 | ||
@@ -45,8 +44,7 @@ | |||
45 | /* DECLARE_VVAR(offset, type, name) */ | 44 | /* DECLARE_VVAR(offset, type, name) */ |
46 | 45 | ||
47 | DECLARE_VVAR(0, volatile unsigned long, jiffies) | 46 | DECLARE_VVAR(0, volatile unsigned long, jiffies) |
48 | DECLARE_VVAR(8, int, vgetcpu_mode) | 47 | DECLARE_VVAR(16, int, vgetcpu_mode) |
49 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) | 48 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) |
50 | 49 | ||
51 | #undef DECLARE_VVAR | 50 | #undef DECLARE_VVAR |
52 | #undef VSYSCALL_VARS_OFFSET | ||
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index d240ea950519..417777de5a40 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -39,6 +39,8 @@ | |||
39 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
41 | 41 | ||
42 | #include <trace/events/xen.h> | ||
43 | |||
42 | #include <asm/page.h> | 44 | #include <asm/page.h> |
43 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
44 | 46 | ||
@@ -459,6 +461,8 @@ MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) | |||
459 | { | 461 | { |
460 | mcl->op = __HYPERVISOR_fpu_taskswitch; | 462 | mcl->op = __HYPERVISOR_fpu_taskswitch; |
461 | mcl->args[0] = set; | 463 | mcl->args[0] = set; |
464 | |||
465 | trace_xen_mc_entry(mcl, 1); | ||
462 | } | 466 | } |
463 | 467 | ||
464 | static inline void | 468 | static inline void |
@@ -475,6 +479,8 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, | |||
475 | mcl->args[2] = new_val.pte >> 32; | 479 | mcl->args[2] = new_val.pte >> 32; |
476 | mcl->args[3] = flags; | 480 | mcl->args[3] = flags; |
477 | } | 481 | } |
482 | |||
483 | trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 3 : 4); | ||
478 | } | 484 | } |
479 | 485 | ||
480 | static inline void | 486 | static inline void |
@@ -485,6 +491,8 @@ MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, | |||
485 | mcl->args[0] = cmd; | 491 | mcl->args[0] = cmd; |
486 | mcl->args[1] = (unsigned long)uop; | 492 | mcl->args[1] = (unsigned long)uop; |
487 | mcl->args[2] = count; | 493 | mcl->args[2] = count; |
494 | |||
495 | trace_xen_mc_entry(mcl, 3); | ||
488 | } | 496 | } |
489 | 497 | ||
490 | static inline void | 498 | static inline void |
@@ -504,6 +512,8 @@ MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long v | |||
504 | mcl->args[3] = flags; | 512 | mcl->args[3] = flags; |
505 | mcl->args[4] = domid; | 513 | mcl->args[4] = domid; |
506 | } | 514 | } |
515 | |||
516 | trace_xen_mc_entry(mcl, sizeof(new_val) == sizeof(long) ? 4 : 5); | ||
507 | } | 517 | } |
508 | 518 | ||
509 | static inline void | 519 | static inline void |
@@ -520,6 +530,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, | |||
520 | mcl->args[2] = desc.a; | 530 | mcl->args[2] = desc.a; |
521 | mcl->args[3] = desc.b; | 531 | mcl->args[3] = desc.b; |
522 | } | 532 | } |
533 | |||
534 | trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4); | ||
523 | } | 535 | } |
524 | 536 | ||
525 | static inline void | 537 | static inline void |
@@ -528,6 +540,8 @@ MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg) | |||
528 | mcl->op = __HYPERVISOR_memory_op; | 540 | mcl->op = __HYPERVISOR_memory_op; |
529 | mcl->args[0] = cmd; | 541 | mcl->args[0] = cmd; |
530 | mcl->args[1] = (unsigned long)arg; | 542 | mcl->args[1] = (unsigned long)arg; |
543 | |||
544 | trace_xen_mc_entry(mcl, 2); | ||
531 | } | 545 | } |
532 | 546 | ||
533 | static inline void | 547 | static inline void |
@@ -539,6 +553,8 @@ MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, | |||
539 | mcl->args[1] = count; | 553 | mcl->args[1] = count; |
540 | mcl->args[2] = (unsigned long)success_count; | 554 | mcl->args[2] = (unsigned long)success_count; |
541 | mcl->args[3] = domid; | 555 | mcl->args[3] = domid; |
556 | |||
557 | trace_xen_mc_entry(mcl, 4); | ||
542 | } | 558 | } |
543 | 559 | ||
544 | static inline void | 560 | static inline void |
@@ -550,6 +566,8 @@ MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, | |||
550 | mcl->args[1] = count; | 566 | mcl->args[1] = count; |
551 | mcl->args[2] = (unsigned long)success_count; | 567 | mcl->args[2] = (unsigned long)success_count; |
552 | mcl->args[3] = domid; | 568 | mcl->args[3] = domid; |
569 | |||
570 | trace_xen_mc_entry(mcl, 4); | ||
553 | } | 571 | } |
554 | 572 | ||
555 | static inline void | 573 | static inline void |
@@ -558,6 +576,8 @@ MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) | |||
558 | mcl->op = __HYPERVISOR_set_gdt; | 576 | mcl->op = __HYPERVISOR_set_gdt; |
559 | mcl->args[0] = (unsigned long)frames; | 577 | mcl->args[0] = (unsigned long)frames; |
560 | mcl->args[1] = entries; | 578 | mcl->args[1] = entries; |
579 | |||
580 | trace_xen_mc_entry(mcl, 2); | ||
561 | } | 581 | } |
562 | 582 | ||
563 | static inline void | 583 | static inline void |
@@ -567,6 +587,8 @@ MULTI_stack_switch(struct multicall_entry *mcl, | |||
567 | mcl->op = __HYPERVISOR_stack_switch; | 587 | mcl->op = __HYPERVISOR_stack_switch; |
568 | mcl->args[0] = ss; | 588 | mcl->args[0] = ss; |
569 | mcl->args[1] = esp; | 589 | mcl->args[1] = esp; |
590 | |||
591 | trace_xen_mc_entry(mcl, 2); | ||
570 | } | 592 | } |
571 | 593 | ||
572 | #endif /* _ASM_X86_XEN_HYPERCALL_H */ | 594 | #endif /* _ASM_X86_XEN_HYPERCALL_H */ |
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 4fbda9a3f339..968d57dd54c9 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h | |||
@@ -14,13 +14,14 @@ static inline int pci_xen_hvm_init(void) | |||
14 | } | 14 | } |
15 | #endif | 15 | #endif |
16 | #if defined(CONFIG_XEN_DOM0) | 16 | #if defined(CONFIG_XEN_DOM0) |
17 | void __init xen_setup_pirqs(void); | 17 | int __init pci_xen_initial_domain(void); |
18 | int xen_find_device_domain_owner(struct pci_dev *dev); | 18 | int xen_find_device_domain_owner(struct pci_dev *dev); |
19 | int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); | 19 | int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); |
20 | int xen_unregister_device_domain_owner(struct pci_dev *dev); | 20 | int xen_unregister_device_domain_owner(struct pci_dev *dev); |
21 | #else | 21 | #else |
22 | static inline void __init xen_setup_pirqs(void) | 22 | static inline int __init pci_xen_initial_domain(void) |
23 | { | 23 | { |
24 | return -1; | ||
24 | } | 25 | } |
25 | static inline int xen_find_device_domain_owner(struct pci_dev *dev) | 26 | static inline int xen_find_device_domain_owner(struct pci_dev *dev) |
26 | { | 27 | { |
diff --git a/arch/x86/include/asm/xen/trace_types.h b/arch/x86/include/asm/xen/trace_types.h new file mode 100644 index 000000000000..21e1874c0a0b --- /dev/null +++ b/arch/x86/include/asm/xen/trace_types.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef _ASM_XEN_TRACE_TYPES_H | ||
2 | #define _ASM_XEN_TRACE_TYPES_H | ||
3 | |||
4 | enum xen_mc_flush_reason { | ||
5 | XEN_MC_FL_NONE, /* explicit flush */ | ||
6 | XEN_MC_FL_BATCH, /* out of hypercall space */ | ||
7 | XEN_MC_FL_ARGS, /* out of argument space */ | ||
8 | XEN_MC_FL_CALLBACK, /* out of callback space */ | ||
9 | }; | ||
10 | |||
11 | enum xen_mc_extend_args { | ||
12 | XEN_MC_XE_OK, | ||
13 | XEN_MC_XE_BAD_OP, | ||
14 | XEN_MC_XE_NO_SPACE | ||
15 | }; | ||
16 | typedef void (*xen_mc_callback_fn_t)(void *); | ||
17 | |||
18 | #endif /* _ASM_XEN_TRACE_TYPES_H */ | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 90b06d4daee2..04105574c8e9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -24,17 +24,12 @@ endif | |||
24 | nostackp := $(call cc-option, -fno-stack-protector) | 24 | nostackp := $(call cc-option, -fno-stack-protector) |
25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | 25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) |
26 | CFLAGS_hpet.o := $(nostackp) | 26 | CFLAGS_hpet.o := $(nostackp) |
27 | CFLAGS_vread_tsc_64.o := $(nostackp) | ||
28 | CFLAGS_paravirt.o := $(nostackp) | 27 | CFLAGS_paravirt.o := $(nostackp) |
29 | GCOV_PROFILE_vsyscall_64.o := n | 28 | GCOV_PROFILE_vsyscall_64.o := n |
30 | GCOV_PROFILE_hpet.o := n | 29 | GCOV_PROFILE_hpet.o := n |
31 | GCOV_PROFILE_tsc.o := n | 30 | GCOV_PROFILE_tsc.o := n |
32 | GCOV_PROFILE_vread_tsc_64.o := n | ||
33 | GCOV_PROFILE_paravirt.o := n | 31 | GCOV_PROFILE_paravirt.o := n |
34 | 32 | ||
35 | # vread_tsc_64 is hot and should be fully optimized: | ||
36 | CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls | ||
37 | |||
38 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 33 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
39 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 34 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
40 | obj-y += time.o ioport.o ldt.o dumpstack.o | 35 | obj-y += time.o ioport.o ldt.o dumpstack.o |
@@ -43,7 +38,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
43 | obj-y += probe_roms.o | 38 | obj-y += probe_roms.o |
44 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 39 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
45 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 40 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
46 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o | 41 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
42 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | ||
47 | obj-y += bootflag.o e820.o | 43 | obj-y += bootflag.o e820.o |
48 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 44 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
49 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
@@ -123,7 +119,6 @@ ifeq ($(CONFIG_X86_64),y) | |||
123 | 119 | ||
124 | obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o | 120 | obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o |
125 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | 121 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o |
126 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | ||
127 | 122 | ||
128 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o | 123 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o |
129 | obj-y += vsmp_64.o | 124 | obj-y += vsmp_64.o |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index ead21b663117..b4fd836e4053 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S | |||
@@ -28,6 +28,8 @@ pmode_cr3: .long 0 /* Saved %cr3 */ | |||
28 | pmode_cr4: .long 0 /* Saved %cr4 */ | 28 | pmode_cr4: .long 0 /* Saved %cr4 */ |
29 | pmode_efer: .quad 0 /* Saved EFER */ | 29 | pmode_efer: .quad 0 /* Saved EFER */ |
30 | pmode_gdt: .quad 0 | 30 | pmode_gdt: .quad 0 |
31 | pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ | ||
32 | pmode_behavior: .long 0 /* Wakeup behavior flags */ | ||
31 | realmode_flags: .long 0 | 33 | realmode_flags: .long 0 |
32 | real_magic: .long 0 | 34 | real_magic: .long 0 |
33 | trampoline_segment: .word 0 | 35 | trampoline_segment: .word 0 |
@@ -91,6 +93,18 @@ wakeup_code: | |||
91 | /* Call the C code */ | 93 | /* Call the C code */ |
92 | calll main | 94 | calll main |
93 | 95 | ||
96 | /* Restore MISC_ENABLE before entering protected mode, in case | ||
97 | BIOS decided to clear XD_DISABLE during S3. */ | ||
98 | movl pmode_behavior, %eax | ||
99 | btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax | ||
100 | jnc 1f | ||
101 | |||
102 | movl pmode_misc_en, %eax | ||
103 | movl pmode_misc_en + 4, %edx | ||
104 | movl $MSR_IA32_MISC_ENABLE, %ecx | ||
105 | wrmsr | ||
106 | 1: | ||
107 | |||
94 | /* Do any other stuff... */ | 108 | /* Do any other stuff... */ |
95 | 109 | ||
96 | #ifndef CONFIG_64BIT | 110 | #ifndef CONFIG_64BIT |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index e1828c07e79c..97a29e1430e3 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h | |||
@@ -21,6 +21,9 @@ struct wakeup_header { | |||
21 | u32 pmode_efer_low; /* Protected mode EFER */ | 21 | u32 pmode_efer_low; /* Protected mode EFER */ |
22 | u32 pmode_efer_high; | 22 | u32 pmode_efer_high; |
23 | u64 pmode_gdt; | 23 | u64 pmode_gdt; |
24 | u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ | ||
25 | u32 pmode_misc_en_high; | ||
26 | u32 pmode_behavior; /* Wakeup routine behavior flags */ | ||
24 | u32 realmode_flags; | 27 | u32 realmode_flags; |
25 | u32 real_magic; | 28 | u32 real_magic; |
26 | u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ | 29 | u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ |
@@ -39,4 +42,7 @@ extern struct wakeup_header wakeup_header; | |||
39 | #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 | 42 | #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 |
40 | #define WAKEUP_END_SIGNATURE 0x65a22c82 | 43 | #define WAKEUP_END_SIGNATURE 0x65a22c82 |
41 | 44 | ||
45 | /* Wakeup behavior bits */ | ||
46 | #define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 | ||
47 | |||
42 | #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ | 48 | #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 18a857ba7a25..103b6ab368d3 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -77,6 +77,12 @@ int acpi_suspend_lowlevel(void) | |||
77 | 77 | ||
78 | header->pmode_cr0 = read_cr0(); | 78 | header->pmode_cr0 = read_cr0(); |
79 | header->pmode_cr4 = read_cr4_safe(); | 79 | header->pmode_cr4 = read_cr4_safe(); |
80 | header->pmode_behavior = 0; | ||
81 | if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, | ||
82 | &header->pmode_misc_en_low, | ||
83 | &header->pmode_misc_en_high)) | ||
84 | header->pmode_behavior |= | ||
85 | (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE); | ||
80 | header->realmode_flags = acpi_realmode_flags; | 86 | header->realmode_flags = acpi_realmode_flags; |
81 | header->real_magic = 0x12345678; | 87 | header->real_magic = 0x12345678; |
82 | 88 | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a81f2d52f869..c63822816249 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | #include <asm/mce.h> | 15 | #include <asm/mce.h> |
16 | #include <asm/nmi.h> | 16 | #include <asm/nmi.h> |
17 | #include <asm/vsyscall.h> | ||
18 | #include <asm/cacheflush.h> | 17 | #include <asm/cacheflush.h> |
19 | #include <asm/tlbflush.h> | 18 | #include <asm/tlbflush.h> |
20 | #include <asm/io.h> | 19 | #include <asm/io.h> |
@@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
250 | 249 | ||
251 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 250 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
252 | extern s32 __smp_locks[], __smp_locks_end[]; | 251 | extern s32 __smp_locks[], __smp_locks_end[]; |
253 | extern char __vsyscall_0; | ||
254 | void *text_poke_early(void *addr, const void *opcode, size_t len); | 252 | void *text_poke_early(void *addr, const void *opcode, size_t len); |
255 | 253 | ||
256 | /* Replace instructions with better alternatives for this CPU type. | 254 | /* Replace instructions with better alternatives for this CPU type. |
@@ -263,6 +261,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
263 | struct alt_instr *end) | 261 | struct alt_instr *end) |
264 | { | 262 | { |
265 | struct alt_instr *a; | 263 | struct alt_instr *a; |
264 | u8 *instr, *replacement; | ||
266 | u8 insnbuf[MAX_PATCH_LEN]; | 265 | u8 insnbuf[MAX_PATCH_LEN]; |
267 | 266 | ||
268 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); | 267 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); |
@@ -276,25 +275,23 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
276 | * order. | 275 | * order. |
277 | */ | 276 | */ |
278 | for (a = start; a < end; a++) { | 277 | for (a = start; a < end; a++) { |
279 | u8 *instr = a->instr; | 278 | instr = (u8 *)&a->instr_offset + a->instr_offset; |
279 | replacement = (u8 *)&a->repl_offset + a->repl_offset; | ||
280 | BUG_ON(a->replacementlen > a->instrlen); | 280 | BUG_ON(a->replacementlen > a->instrlen); |
281 | BUG_ON(a->instrlen > sizeof(insnbuf)); | 281 | BUG_ON(a->instrlen > sizeof(insnbuf)); |
282 | BUG_ON(a->cpuid >= NCAPINTS*32); | 282 | BUG_ON(a->cpuid >= NCAPINTS*32); |
283 | if (!boot_cpu_has(a->cpuid)) | 283 | if (!boot_cpu_has(a->cpuid)) |
284 | continue; | 284 | continue; |
285 | #ifdef CONFIG_X86_64 | 285 | |
286 | /* vsyscall code is not mapped yet. resolve it manually. */ | 286 | memcpy(insnbuf, replacement, a->replacementlen); |
287 | if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { | 287 | |
288 | instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); | 288 | /* 0xe8 is a relative jump; fix the offset. */ |
289 | DPRINTK("%s: vsyscall fixup: %p => %p\n", | ||
290 | __func__, a->instr, instr); | ||
291 | } | ||
292 | #endif | ||
293 | memcpy(insnbuf, a->replacement, a->replacementlen); | ||
294 | if (*insnbuf == 0xe8 && a->replacementlen == 5) | 289 | if (*insnbuf == 0xe8 && a->replacementlen == 5) |
295 | *(s32 *)(insnbuf + 1) += a->replacement - a->instr; | 290 | *(s32 *)(insnbuf + 1) += replacement - instr; |
291 | |||
296 | add_nops(insnbuf + a->replacementlen, | 292 | add_nops(insnbuf + a->replacementlen, |
297 | a->instrlen - a->replacementlen); | 293 | a->instrlen - a->replacementlen); |
294 | |||
298 | text_poke_early(instr, insnbuf, a->instrlen); | 295 | text_poke_early(instr, insnbuf, a->instrlen); |
299 | } | 296 | } |
300 | } | 297 | } |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c deleted file mode 100644 index 7c3a95e54ec5..000000000000 --- a/arch/x86/kernel/amd_iommu.c +++ /dev/null | |||
@@ -1,2764 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. | ||
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | ||
4 | * Leo Duran <leo.duran@amd.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include <linux/pci.h> | ||
21 | #include <linux/pci-ats.h> | ||
22 | #include <linux/bitmap.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/debugfs.h> | ||
25 | #include <linux/scatterlist.h> | ||
26 | #include <linux/dma-mapping.h> | ||
27 | #include <linux/iommu-helper.h> | ||
28 | #include <linux/iommu.h> | ||
29 | #include <linux/delay.h> | ||
30 | #include <asm/proto.h> | ||
31 | #include <asm/iommu.h> | ||
32 | #include <asm/gart.h> | ||
33 | #include <asm/dma.h> | ||
34 | #include <asm/amd_iommu_proto.h> | ||
35 | #include <asm/amd_iommu_types.h> | ||
36 | #include <asm/amd_iommu.h> | ||
37 | |||
38 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) | ||
39 | |||
40 | #define LOOP_TIMEOUT 100000 | ||
41 | |||
42 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | ||
43 | |||
44 | /* A list of preallocated protection domains */ | ||
45 | static LIST_HEAD(iommu_pd_list); | ||
46 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | ||
47 | |||
48 | /* | ||
49 | * Domain for untranslated devices - only allocated | ||
50 | * if iommu=pt passed on kernel cmd line. | ||
51 | */ | ||
52 | static struct protection_domain *pt_domain; | ||
53 | |||
54 | static struct iommu_ops amd_iommu_ops; | ||
55 | |||
56 | /* | ||
57 | * general struct to manage commands send to an IOMMU | ||
58 | */ | ||
59 | struct iommu_cmd { | ||
60 | u32 data[4]; | ||
61 | }; | ||
62 | |||
63 | static void update_domain(struct protection_domain *domain); | ||
64 | |||
65 | /**************************************************************************** | ||
66 | * | ||
67 | * Helper functions | ||
68 | * | ||
69 | ****************************************************************************/ | ||
70 | |||
71 | static inline u16 get_device_id(struct device *dev) | ||
72 | { | ||
73 | struct pci_dev *pdev = to_pci_dev(dev); | ||
74 | |||
75 | return calc_devid(pdev->bus->number, pdev->devfn); | ||
76 | } | ||
77 | |||
78 | static struct iommu_dev_data *get_dev_data(struct device *dev) | ||
79 | { | ||
80 | return dev->archdata.iommu; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * In this function the list of preallocated protection domains is traversed to | ||
85 | * find the domain for a specific device | ||
86 | */ | ||
87 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
88 | { | ||
89 | struct dma_ops_domain *entry, *ret = NULL; | ||
90 | unsigned long flags; | ||
91 | u16 alias = amd_iommu_alias_table[devid]; | ||
92 | |||
93 | if (list_empty(&iommu_pd_list)) | ||
94 | return NULL; | ||
95 | |||
96 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
97 | |||
98 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
99 | if (entry->target_dev == devid || | ||
100 | entry->target_dev == alias) { | ||
101 | ret = entry; | ||
102 | break; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * This function checks if the driver got a valid device from the caller to | ||
113 | * avoid dereferencing invalid pointers. | ||
114 | */ | ||
115 | static bool check_device(struct device *dev) | ||
116 | { | ||
117 | u16 devid; | ||
118 | |||
119 | if (!dev || !dev->dma_mask) | ||
120 | return false; | ||
121 | |||
122 | /* No device or no PCI device */ | ||
123 | if (dev->bus != &pci_bus_type) | ||
124 | return false; | ||
125 | |||
126 | devid = get_device_id(dev); | ||
127 | |||
128 | /* Out of our scope? */ | ||
129 | if (devid > amd_iommu_last_bdf) | ||
130 | return false; | ||
131 | |||
132 | if (amd_iommu_rlookup_table[devid] == NULL) | ||
133 | return false; | ||
134 | |||
135 | return true; | ||
136 | } | ||
137 | |||
138 | static int iommu_init_device(struct device *dev) | ||
139 | { | ||
140 | struct iommu_dev_data *dev_data; | ||
141 | struct pci_dev *pdev; | ||
142 | u16 devid, alias; | ||
143 | |||
144 | if (dev->archdata.iommu) | ||
145 | return 0; | ||
146 | |||
147 | dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); | ||
148 | if (!dev_data) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | dev_data->dev = dev; | ||
152 | |||
153 | devid = get_device_id(dev); | ||
154 | alias = amd_iommu_alias_table[devid]; | ||
155 | pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); | ||
156 | if (pdev) | ||
157 | dev_data->alias = &pdev->dev; | ||
158 | else { | ||
159 | kfree(dev_data); | ||
160 | return -ENOTSUPP; | ||
161 | } | ||
162 | |||
163 | atomic_set(&dev_data->bind, 0); | ||
164 | |||
165 | dev->archdata.iommu = dev_data; | ||
166 | |||
167 | |||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static void iommu_ignore_device(struct device *dev) | ||
172 | { | ||
173 | u16 devid, alias; | ||
174 | |||
175 | devid = get_device_id(dev); | ||
176 | alias = amd_iommu_alias_table[devid]; | ||
177 | |||
178 | memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); | ||
179 | memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); | ||
180 | |||
181 | amd_iommu_rlookup_table[devid] = NULL; | ||
182 | amd_iommu_rlookup_table[alias] = NULL; | ||
183 | } | ||
184 | |||
185 | static void iommu_uninit_device(struct device *dev) | ||
186 | { | ||
187 | kfree(dev->archdata.iommu); | ||
188 | } | ||
189 | |||
190 | void __init amd_iommu_uninit_devices(void) | ||
191 | { | ||
192 | struct pci_dev *pdev = NULL; | ||
193 | |||
194 | for_each_pci_dev(pdev) { | ||
195 | |||
196 | if (!check_device(&pdev->dev)) | ||
197 | continue; | ||
198 | |||
199 | iommu_uninit_device(&pdev->dev); | ||
200 | } | ||
201 | } | ||
202 | |||
203 | int __init amd_iommu_init_devices(void) | ||
204 | { | ||
205 | struct pci_dev *pdev = NULL; | ||
206 | int ret = 0; | ||
207 | |||
208 | for_each_pci_dev(pdev) { | ||
209 | |||
210 | if (!check_device(&pdev->dev)) | ||
211 | continue; | ||
212 | |||
213 | ret = iommu_init_device(&pdev->dev); | ||
214 | if (ret == -ENOTSUPP) | ||
215 | iommu_ignore_device(&pdev->dev); | ||
216 | else if (ret) | ||
217 | goto out_free; | ||
218 | } | ||
219 | |||
220 | return 0; | ||
221 | |||
222 | out_free: | ||
223 | |||
224 | amd_iommu_uninit_devices(); | ||
225 | |||
226 | return ret; | ||
227 | } | ||
228 | #ifdef CONFIG_AMD_IOMMU_STATS | ||
229 | |||
230 | /* | ||
231 | * Initialization code for statistics collection | ||
232 | */ | ||
233 | |||
234 | DECLARE_STATS_COUNTER(compl_wait); | ||
235 | DECLARE_STATS_COUNTER(cnt_map_single); | ||
236 | DECLARE_STATS_COUNTER(cnt_unmap_single); | ||
237 | DECLARE_STATS_COUNTER(cnt_map_sg); | ||
238 | DECLARE_STATS_COUNTER(cnt_unmap_sg); | ||
239 | DECLARE_STATS_COUNTER(cnt_alloc_coherent); | ||
240 | DECLARE_STATS_COUNTER(cnt_free_coherent); | ||
241 | DECLARE_STATS_COUNTER(cross_page); | ||
242 | DECLARE_STATS_COUNTER(domain_flush_single); | ||
243 | DECLARE_STATS_COUNTER(domain_flush_all); | ||
244 | DECLARE_STATS_COUNTER(alloced_io_mem); | ||
245 | DECLARE_STATS_COUNTER(total_map_requests); | ||
246 | |||
247 | static struct dentry *stats_dir; | ||
248 | static struct dentry *de_fflush; | ||
249 | |||
250 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) | ||
251 | { | ||
252 | if (stats_dir == NULL) | ||
253 | return; | ||
254 | |||
255 | cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, | ||
256 | &cnt->value); | ||
257 | } | ||
258 | |||
259 | static void amd_iommu_stats_init(void) | ||
260 | { | ||
261 | stats_dir = debugfs_create_dir("amd-iommu", NULL); | ||
262 | if (stats_dir == NULL) | ||
263 | return; | ||
264 | |||
265 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, | ||
266 | (u32 *)&amd_iommu_unmap_flush); | ||
267 | |||
268 | amd_iommu_stats_add(&compl_wait); | ||
269 | amd_iommu_stats_add(&cnt_map_single); | ||
270 | amd_iommu_stats_add(&cnt_unmap_single); | ||
271 | amd_iommu_stats_add(&cnt_map_sg); | ||
272 | amd_iommu_stats_add(&cnt_unmap_sg); | ||
273 | amd_iommu_stats_add(&cnt_alloc_coherent); | ||
274 | amd_iommu_stats_add(&cnt_free_coherent); | ||
275 | amd_iommu_stats_add(&cross_page); | ||
276 | amd_iommu_stats_add(&domain_flush_single); | ||
277 | amd_iommu_stats_add(&domain_flush_all); | ||
278 | amd_iommu_stats_add(&alloced_io_mem); | ||
279 | amd_iommu_stats_add(&total_map_requests); | ||
280 | } | ||
281 | |||
282 | #endif | ||
283 | |||
284 | /**************************************************************************** | ||
285 | * | ||
286 | * Interrupt handling functions | ||
287 | * | ||
288 | ****************************************************************************/ | ||
289 | |||
290 | static void dump_dte_entry(u16 devid) | ||
291 | { | ||
292 | int i; | ||
293 | |||
294 | for (i = 0; i < 8; ++i) | ||
295 | pr_err("AMD-Vi: DTE[%d]: %08x\n", i, | ||
296 | amd_iommu_dev_table[devid].data[i]); | ||
297 | } | ||
298 | |||
299 | static void dump_command(unsigned long phys_addr) | ||
300 | { | ||
301 | struct iommu_cmd *cmd = phys_to_virt(phys_addr); | ||
302 | int i; | ||
303 | |||
304 | for (i = 0; i < 4; ++i) | ||
305 | pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); | ||
306 | } | ||
307 | |||
308 | static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | ||
309 | { | ||
310 | u32 *event = __evt; | ||
311 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | ||
312 | int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; | ||
313 | int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; | ||
314 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | ||
315 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | ||
316 | |||
317 | printk(KERN_ERR "AMD-Vi: Event logged ["); | ||
318 | |||
319 | switch (type) { | ||
320 | case EVENT_TYPE_ILL_DEV: | ||
321 | printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " | ||
322 | "address=0x%016llx flags=0x%04x]\n", | ||
323 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
324 | address, flags); | ||
325 | dump_dte_entry(devid); | ||
326 | break; | ||
327 | case EVENT_TYPE_IO_FAULT: | ||
328 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | ||
329 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
330 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
331 | domid, address, flags); | ||
332 | break; | ||
333 | case EVENT_TYPE_DEV_TAB_ERR: | ||
334 | printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
335 | "address=0x%016llx flags=0x%04x]\n", | ||
336 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
337 | address, flags); | ||
338 | break; | ||
339 | case EVENT_TYPE_PAGE_TAB_ERR: | ||
340 | printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
341 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
342 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
343 | domid, address, flags); | ||
344 | break; | ||
345 | case EVENT_TYPE_ILL_CMD: | ||
346 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | ||
347 | dump_command(address); | ||
348 | break; | ||
349 | case EVENT_TYPE_CMD_HARD_ERR: | ||
350 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | ||
351 | "flags=0x%04x]\n", address, flags); | ||
352 | break; | ||
353 | case EVENT_TYPE_IOTLB_INV_TO: | ||
354 | printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " | ||
355 | "address=0x%016llx]\n", | ||
356 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
357 | address); | ||
358 | break; | ||
359 | case EVENT_TYPE_INV_DEV_REQ: | ||
360 | printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " | ||
361 | "address=0x%016llx flags=0x%04x]\n", | ||
362 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
363 | address, flags); | ||
364 | break; | ||
365 | default: | ||
366 | printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | static void iommu_poll_events(struct amd_iommu *iommu) | ||
371 | { | ||
372 | u32 head, tail; | ||
373 | unsigned long flags; | ||
374 | |||
375 | spin_lock_irqsave(&iommu->lock, flags); | ||
376 | |||
377 | head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
378 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
379 | |||
380 | while (head != tail) { | ||
381 | iommu_print_event(iommu, iommu->evt_buf + head); | ||
382 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | ||
383 | } | ||
384 | |||
385 | writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
386 | |||
387 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
388 | } | ||
389 | |||
390 | irqreturn_t amd_iommu_int_thread(int irq, void *data) | ||
391 | { | ||
392 | struct amd_iommu *iommu; | ||
393 | |||
394 | for_each_iommu(iommu) | ||
395 | iommu_poll_events(iommu); | ||
396 | |||
397 | return IRQ_HANDLED; | ||
398 | } | ||
399 | |||
400 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | ||
401 | { | ||
402 | return IRQ_WAKE_THREAD; | ||
403 | } | ||
404 | |||
405 | /**************************************************************************** | ||
406 | * | ||
407 | * IOMMU command queuing functions | ||
408 | * | ||
409 | ****************************************************************************/ | ||
410 | |||
411 | static int wait_on_sem(volatile u64 *sem) | ||
412 | { | ||
413 | int i = 0; | ||
414 | |||
415 | while (*sem == 0 && i < LOOP_TIMEOUT) { | ||
416 | udelay(1); | ||
417 | i += 1; | ||
418 | } | ||
419 | |||
420 | if (i == LOOP_TIMEOUT) { | ||
421 | pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); | ||
422 | return -EIO; | ||
423 | } | ||
424 | |||
425 | return 0; | ||
426 | } | ||
427 | |||
428 | static void copy_cmd_to_buffer(struct amd_iommu *iommu, | ||
429 | struct iommu_cmd *cmd, | ||
430 | u32 tail) | ||
431 | { | ||
432 | u8 *target; | ||
433 | |||
434 | target = iommu->cmd_buf + tail; | ||
435 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | ||
436 | |||
437 | /* Copy command to buffer */ | ||
438 | memcpy(target, cmd, sizeof(*cmd)); | ||
439 | |||
440 | /* Tell the IOMMU about it */ | ||
441 | writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
442 | } | ||
443 | |||
444 | static void build_completion_wait(struct iommu_cmd *cmd, u64 address) | ||
445 | { | ||
446 | WARN_ON(address & 0x7ULL); | ||
447 | |||
448 | memset(cmd, 0, sizeof(*cmd)); | ||
449 | cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; | ||
450 | cmd->data[1] = upper_32_bits(__pa(address)); | ||
451 | cmd->data[2] = 1; | ||
452 | CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); | ||
453 | } | ||
454 | |||
455 | static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) | ||
456 | { | ||
457 | memset(cmd, 0, sizeof(*cmd)); | ||
458 | cmd->data[0] = devid; | ||
459 | CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); | ||
460 | } | ||
461 | |||
462 | static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | ||
463 | size_t size, u16 domid, int pde) | ||
464 | { | ||
465 | u64 pages; | ||
466 | int s; | ||
467 | |||
468 | pages = iommu_num_pages(address, size, PAGE_SIZE); | ||
469 | s = 0; | ||
470 | |||
471 | if (pages > 1) { | ||
472 | /* | ||
473 | * If we have to flush more than one page, flush all | ||
474 | * TLB entries for this domain | ||
475 | */ | ||
476 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
477 | s = 1; | ||
478 | } | ||
479 | |||
480 | address &= PAGE_MASK; | ||
481 | |||
482 | memset(cmd, 0, sizeof(*cmd)); | ||
483 | cmd->data[1] |= domid; | ||
484 | cmd->data[2] = lower_32_bits(address); | ||
485 | cmd->data[3] = upper_32_bits(address); | ||
486 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); | ||
487 | if (s) /* size bit - we flush more than one 4kb page */ | ||
488 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
489 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
490 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
491 | } | ||
492 | |||
493 | static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, | ||
494 | u64 address, size_t size) | ||
495 | { | ||
496 | u64 pages; | ||
497 | int s; | ||
498 | |||
499 | pages = iommu_num_pages(address, size, PAGE_SIZE); | ||
500 | s = 0; | ||
501 | |||
502 | if (pages > 1) { | ||
503 | /* | ||
504 | * If we have to flush more than one page, flush all | ||
505 | * TLB entries for this domain | ||
506 | */ | ||
507 | address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
508 | s = 1; | ||
509 | } | ||
510 | |||
511 | address &= PAGE_MASK; | ||
512 | |||
513 | memset(cmd, 0, sizeof(*cmd)); | ||
514 | cmd->data[0] = devid; | ||
515 | cmd->data[0] |= (qdep & 0xff) << 24; | ||
516 | cmd->data[1] = devid; | ||
517 | cmd->data[2] = lower_32_bits(address); | ||
518 | cmd->data[3] = upper_32_bits(address); | ||
519 | CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); | ||
520 | if (s) | ||
521 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
522 | } | ||
523 | |||
524 | static void build_inv_all(struct iommu_cmd *cmd) | ||
525 | { | ||
526 | memset(cmd, 0, sizeof(*cmd)); | ||
527 | CMD_SET_TYPE(cmd, CMD_INV_ALL); | ||
528 | } | ||
529 | |||
530 | /* | ||
531 | * Writes the command to the IOMMUs command buffer and informs the | ||
532 | * hardware about the new command. | ||
533 | */ | ||
534 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | ||
535 | { | ||
536 | u32 left, tail, head, next_tail; | ||
537 | unsigned long flags; | ||
538 | |||
539 | WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); | ||
540 | |||
541 | again: | ||
542 | spin_lock_irqsave(&iommu->lock, flags); | ||
543 | |||
544 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
545 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
546 | next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | ||
547 | left = (head - next_tail) % iommu->cmd_buf_size; | ||
548 | |||
549 | if (left <= 2) { | ||
550 | struct iommu_cmd sync_cmd; | ||
551 | volatile u64 sem = 0; | ||
552 | int ret; | ||
553 | |||
554 | build_completion_wait(&sync_cmd, (u64)&sem); | ||
555 | copy_cmd_to_buffer(iommu, &sync_cmd, tail); | ||
556 | |||
557 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
558 | |||
559 | if ((ret = wait_on_sem(&sem)) != 0) | ||
560 | return ret; | ||
561 | |||
562 | goto again; | ||
563 | } | ||
564 | |||
565 | copy_cmd_to_buffer(iommu, cmd, tail); | ||
566 | |||
567 | /* We need to sync now to make sure all commands are processed */ | ||
568 | iommu->need_sync = true; | ||
569 | |||
570 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
571 | |||
572 | return 0; | ||
573 | } | ||
574 | |||
575 | /* | ||
576 | * This function queues a completion wait command into the command | ||
577 | * buffer of an IOMMU | ||
578 | */ | ||
579 | static int iommu_completion_wait(struct amd_iommu *iommu) | ||
580 | { | ||
581 | struct iommu_cmd cmd; | ||
582 | volatile u64 sem = 0; | ||
583 | int ret; | ||
584 | |||
585 | if (!iommu->need_sync) | ||
586 | return 0; | ||
587 | |||
588 | build_completion_wait(&cmd, (u64)&sem); | ||
589 | |||
590 | ret = iommu_queue_command(iommu, &cmd); | ||
591 | if (ret) | ||
592 | return ret; | ||
593 | |||
594 | return wait_on_sem(&sem); | ||
595 | } | ||
596 | |||
597 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) | ||
598 | { | ||
599 | struct iommu_cmd cmd; | ||
600 | |||
601 | build_inv_dte(&cmd, devid); | ||
602 | |||
603 | return iommu_queue_command(iommu, &cmd); | ||
604 | } | ||
605 | |||
606 | static void iommu_flush_dte_all(struct amd_iommu *iommu) | ||
607 | { | ||
608 | u32 devid; | ||
609 | |||
610 | for (devid = 0; devid <= 0xffff; ++devid) | ||
611 | iommu_flush_dte(iommu, devid); | ||
612 | |||
613 | iommu_completion_wait(iommu); | ||
614 | } | ||
615 | |||
616 | /* | ||
617 | * This function uses heavy locking and may disable irqs for some time. But | ||
618 | * this is no issue because it is only called during resume. | ||
619 | */ | ||
620 | static void iommu_flush_tlb_all(struct amd_iommu *iommu) | ||
621 | { | ||
622 | u32 dom_id; | ||
623 | |||
624 | for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { | ||
625 | struct iommu_cmd cmd; | ||
626 | build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
627 | dom_id, 1); | ||
628 | iommu_queue_command(iommu, &cmd); | ||
629 | } | ||
630 | |||
631 | iommu_completion_wait(iommu); | ||
632 | } | ||
633 | |||
634 | static void iommu_flush_all(struct amd_iommu *iommu) | ||
635 | { | ||
636 | struct iommu_cmd cmd; | ||
637 | |||
638 | build_inv_all(&cmd); | ||
639 | |||
640 | iommu_queue_command(iommu, &cmd); | ||
641 | iommu_completion_wait(iommu); | ||
642 | } | ||
643 | |||
644 | void iommu_flush_all_caches(struct amd_iommu *iommu) | ||
645 | { | ||
646 | if (iommu_feature(iommu, FEATURE_IA)) { | ||
647 | iommu_flush_all(iommu); | ||
648 | } else { | ||
649 | iommu_flush_dte_all(iommu); | ||
650 | iommu_flush_tlb_all(iommu); | ||
651 | } | ||
652 | } | ||
653 | |||
654 | /* | ||
655 | * Command send function for flushing on-device TLB | ||
656 | */ | ||
657 | static int device_flush_iotlb(struct device *dev, u64 address, size_t size) | ||
658 | { | ||
659 | struct pci_dev *pdev = to_pci_dev(dev); | ||
660 | struct amd_iommu *iommu; | ||
661 | struct iommu_cmd cmd; | ||
662 | u16 devid; | ||
663 | int qdep; | ||
664 | |||
665 | qdep = pci_ats_queue_depth(pdev); | ||
666 | devid = get_device_id(dev); | ||
667 | iommu = amd_iommu_rlookup_table[devid]; | ||
668 | |||
669 | build_inv_iotlb_pages(&cmd, devid, qdep, address, size); | ||
670 | |||
671 | return iommu_queue_command(iommu, &cmd); | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * Command send function for invalidating a device table entry | ||
676 | */ | ||
677 | static int device_flush_dte(struct device *dev) | ||
678 | { | ||
679 | struct amd_iommu *iommu; | ||
680 | struct pci_dev *pdev; | ||
681 | u16 devid; | ||
682 | int ret; | ||
683 | |||
684 | pdev = to_pci_dev(dev); | ||
685 | devid = get_device_id(dev); | ||
686 | iommu = amd_iommu_rlookup_table[devid]; | ||
687 | |||
688 | ret = iommu_flush_dte(iommu, devid); | ||
689 | if (ret) | ||
690 | return ret; | ||
691 | |||
692 | if (pci_ats_enabled(pdev)) | ||
693 | ret = device_flush_iotlb(dev, 0, ~0UL); | ||
694 | |||
695 | return ret; | ||
696 | } | ||
697 | |||
698 | /* | ||
699 | * TLB invalidation function which is called from the mapping functions. | ||
700 | * It invalidates a single PTE if the range to flush is within a single | ||
701 | * page. Otherwise it flushes the whole TLB of the IOMMU. | ||
702 | */ | ||
703 | static void __domain_flush_pages(struct protection_domain *domain, | ||
704 | u64 address, size_t size, int pde) | ||
705 | { | ||
706 | struct iommu_dev_data *dev_data; | ||
707 | struct iommu_cmd cmd; | ||
708 | int ret = 0, i; | ||
709 | |||
710 | build_inv_iommu_pages(&cmd, address, size, domain->id, pde); | ||
711 | |||
712 | for (i = 0; i < amd_iommus_present; ++i) { | ||
713 | if (!domain->dev_iommu[i]) | ||
714 | continue; | ||
715 | |||
716 | /* | ||
717 | * Devices of this domain are behind this IOMMU | ||
718 | * We need a TLB flush | ||
719 | */ | ||
720 | ret |= iommu_queue_command(amd_iommus[i], &cmd); | ||
721 | } | ||
722 | |||
723 | list_for_each_entry(dev_data, &domain->dev_list, list) { | ||
724 | struct pci_dev *pdev = to_pci_dev(dev_data->dev); | ||
725 | |||
726 | if (!pci_ats_enabled(pdev)) | ||
727 | continue; | ||
728 | |||
729 | ret |= device_flush_iotlb(dev_data->dev, address, size); | ||
730 | } | ||
731 | |||
732 | WARN_ON(ret); | ||
733 | } | ||
734 | |||
735 | static void domain_flush_pages(struct protection_domain *domain, | ||
736 | u64 address, size_t size) | ||
737 | { | ||
738 | __domain_flush_pages(domain, address, size, 0); | ||
739 | } | ||
740 | |||
741 | /* Flush the whole IO/TLB for a given protection domain */ | ||
742 | static void domain_flush_tlb(struct protection_domain *domain) | ||
743 | { | ||
744 | __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); | ||
745 | } | ||
746 | |||
747 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | ||
748 | static void domain_flush_tlb_pde(struct protection_domain *domain) | ||
749 | { | ||
750 | __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); | ||
751 | } | ||
752 | |||
753 | static void domain_flush_complete(struct protection_domain *domain) | ||
754 | { | ||
755 | int i; | ||
756 | |||
757 | for (i = 0; i < amd_iommus_present; ++i) { | ||
758 | if (!domain->dev_iommu[i]) | ||
759 | continue; | ||
760 | |||
761 | /* | ||
762 | * Devices of this domain are behind this IOMMU | ||
763 | * We need to wait for completion of all commands. | ||
764 | */ | ||
765 | iommu_completion_wait(amd_iommus[i]); | ||
766 | } | ||
767 | } | ||
768 | |||
769 | |||
770 | /* | ||
771 | * This function flushes the DTEs for all devices in domain | ||
772 | */ | ||
773 | static void domain_flush_devices(struct protection_domain *domain) | ||
774 | { | ||
775 | struct iommu_dev_data *dev_data; | ||
776 | unsigned long flags; | ||
777 | |||
778 | spin_lock_irqsave(&domain->lock, flags); | ||
779 | |||
780 | list_for_each_entry(dev_data, &domain->dev_list, list) | ||
781 | device_flush_dte(dev_data->dev); | ||
782 | |||
783 | spin_unlock_irqrestore(&domain->lock, flags); | ||
784 | } | ||
785 | |||
786 | /**************************************************************************** | ||
787 | * | ||
788 | * The functions below are used the create the page table mappings for | ||
789 | * unity mapped regions. | ||
790 | * | ||
791 | ****************************************************************************/ | ||
792 | |||
793 | /* | ||
794 | * This function is used to add another level to an IO page table. Adding | ||
795 | * another level increases the size of the address space by 9 bits to a size up | ||
796 | * to 64 bits. | ||
797 | */ | ||
798 | static bool increase_address_space(struct protection_domain *domain, | ||
799 | gfp_t gfp) | ||
800 | { | ||
801 | u64 *pte; | ||
802 | |||
803 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
804 | /* address space already 64 bit large */ | ||
805 | return false; | ||
806 | |||
807 | pte = (void *)get_zeroed_page(gfp); | ||
808 | if (!pte) | ||
809 | return false; | ||
810 | |||
811 | *pte = PM_LEVEL_PDE(domain->mode, | ||
812 | virt_to_phys(domain->pt_root)); | ||
813 | domain->pt_root = pte; | ||
814 | domain->mode += 1; | ||
815 | domain->updated = true; | ||
816 | |||
817 | return true; | ||
818 | } | ||
819 | |||
820 | static u64 *alloc_pte(struct protection_domain *domain, | ||
821 | unsigned long address, | ||
822 | unsigned long page_size, | ||
823 | u64 **pte_page, | ||
824 | gfp_t gfp) | ||
825 | { | ||
826 | int level, end_lvl; | ||
827 | u64 *pte, *page; | ||
828 | |||
829 | BUG_ON(!is_power_of_2(page_size)); | ||
830 | |||
831 | while (address > PM_LEVEL_SIZE(domain->mode)) | ||
832 | increase_address_space(domain, gfp); | ||
833 | |||
834 | level = domain->mode - 1; | ||
835 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
836 | address = PAGE_SIZE_ALIGN(address, page_size); | ||
837 | end_lvl = PAGE_SIZE_LEVEL(page_size); | ||
838 | |||
839 | while (level > end_lvl) { | ||
840 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
841 | page = (u64 *)get_zeroed_page(gfp); | ||
842 | if (!page) | ||
843 | return NULL; | ||
844 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
845 | } | ||
846 | |||
847 | /* No level skipping support yet */ | ||
848 | if (PM_PTE_LEVEL(*pte) != level) | ||
849 | return NULL; | ||
850 | |||
851 | level -= 1; | ||
852 | |||
853 | pte = IOMMU_PTE_PAGE(*pte); | ||
854 | |||
855 | if (pte_page && level == end_lvl) | ||
856 | *pte_page = pte; | ||
857 | |||
858 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | ||
859 | } | ||
860 | |||
861 | return pte; | ||
862 | } | ||
863 | |||
864 | /* | ||
865 | * This function checks if there is a PTE for a given dma address. If | ||
866 | * there is one, it returns the pointer to it. | ||
867 | */ | ||
868 | static u64 *fetch_pte(struct protection_domain *domain, unsigned long address) | ||
869 | { | ||
870 | int level; | ||
871 | u64 *pte; | ||
872 | |||
873 | if (address > PM_LEVEL_SIZE(domain->mode)) | ||
874 | return NULL; | ||
875 | |||
876 | level = domain->mode - 1; | ||
877 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
878 | |||
879 | while (level > 0) { | ||
880 | |||
881 | /* Not Present */ | ||
882 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
883 | return NULL; | ||
884 | |||
885 | /* Large PTE */ | ||
886 | if (PM_PTE_LEVEL(*pte) == 0x07) { | ||
887 | unsigned long pte_mask, __pte; | ||
888 | |||
889 | /* | ||
890 | * If we have a series of large PTEs, make | ||
891 | * sure to return a pointer to the first one. | ||
892 | */ | ||
893 | pte_mask = PTE_PAGE_SIZE(*pte); | ||
894 | pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); | ||
895 | __pte = ((unsigned long)pte) & pte_mask; | ||
896 | |||
897 | return (u64 *)__pte; | ||
898 | } | ||
899 | |||
900 | /* No level skipping support yet */ | ||
901 | if (PM_PTE_LEVEL(*pte) != level) | ||
902 | return NULL; | ||
903 | |||
904 | level -= 1; | ||
905 | |||
906 | /* Walk to the next level */ | ||
907 | pte = IOMMU_PTE_PAGE(*pte); | ||
908 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | ||
909 | } | ||
910 | |||
911 | return pte; | ||
912 | } | ||
913 | |||
914 | /* | ||
915 | * Generic mapping functions. It maps a physical address into a DMA | ||
916 | * address space. It allocates the page table pages if necessary. | ||
917 | * In the future it can be extended to a generic mapping function | ||
918 | * supporting all features of AMD IOMMU page tables like level skipping | ||
919 | * and full 64 bit address spaces. | ||
920 | */ | ||
921 | static int iommu_map_page(struct protection_domain *dom, | ||
922 | unsigned long bus_addr, | ||
923 | unsigned long phys_addr, | ||
924 | int prot, | ||
925 | unsigned long page_size) | ||
926 | { | ||
927 | u64 __pte, *pte; | ||
928 | int i, count; | ||
929 | |||
930 | if (!(prot & IOMMU_PROT_MASK)) | ||
931 | return -EINVAL; | ||
932 | |||
933 | bus_addr = PAGE_ALIGN(bus_addr); | ||
934 | phys_addr = PAGE_ALIGN(phys_addr); | ||
935 | count = PAGE_SIZE_PTE_COUNT(page_size); | ||
936 | pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); | ||
937 | |||
938 | for (i = 0; i < count; ++i) | ||
939 | if (IOMMU_PTE_PRESENT(pte[i])) | ||
940 | return -EBUSY; | ||
941 | |||
942 | if (page_size > PAGE_SIZE) { | ||
943 | __pte = PAGE_SIZE_PTE(phys_addr, page_size); | ||
944 | __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
945 | } else | ||
946 | __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
947 | |||
948 | if (prot & IOMMU_PROT_IR) | ||
949 | __pte |= IOMMU_PTE_IR; | ||
950 | if (prot & IOMMU_PROT_IW) | ||
951 | __pte |= IOMMU_PTE_IW; | ||
952 | |||
953 | for (i = 0; i < count; ++i) | ||
954 | pte[i] = __pte; | ||
955 | |||
956 | update_domain(dom); | ||
957 | |||
958 | return 0; | ||
959 | } | ||
960 | |||
961 | static unsigned long iommu_unmap_page(struct protection_domain *dom, | ||
962 | unsigned long bus_addr, | ||
963 | unsigned long page_size) | ||
964 | { | ||
965 | unsigned long long unmap_size, unmapped; | ||
966 | u64 *pte; | ||
967 | |||
968 | BUG_ON(!is_power_of_2(page_size)); | ||
969 | |||
970 | unmapped = 0; | ||
971 | |||
972 | while (unmapped < page_size) { | ||
973 | |||
974 | pte = fetch_pte(dom, bus_addr); | ||
975 | |||
976 | if (!pte) { | ||
977 | /* | ||
978 | * No PTE for this address | ||
979 | * move forward in 4kb steps | ||
980 | */ | ||
981 | unmap_size = PAGE_SIZE; | ||
982 | } else if (PM_PTE_LEVEL(*pte) == 0) { | ||
983 | /* 4kb PTE found for this address */ | ||
984 | unmap_size = PAGE_SIZE; | ||
985 | *pte = 0ULL; | ||
986 | } else { | ||
987 | int count, i; | ||
988 | |||
989 | /* Large PTE found which maps this address */ | ||
990 | unmap_size = PTE_PAGE_SIZE(*pte); | ||
991 | count = PAGE_SIZE_PTE_COUNT(unmap_size); | ||
992 | for (i = 0; i < count; i++) | ||
993 | pte[i] = 0ULL; | ||
994 | } | ||
995 | |||
996 | bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; | ||
997 | unmapped += unmap_size; | ||
998 | } | ||
999 | |||
1000 | BUG_ON(!is_power_of_2(unmapped)); | ||
1001 | |||
1002 | return unmapped; | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * This function checks if a specific unity mapping entry is needed for | ||
1007 | * this specific IOMMU. | ||
1008 | */ | ||
1009 | static int iommu_for_unity_map(struct amd_iommu *iommu, | ||
1010 | struct unity_map_entry *entry) | ||
1011 | { | ||
1012 | u16 bdf, i; | ||
1013 | |||
1014 | for (i = entry->devid_start; i <= entry->devid_end; ++i) { | ||
1015 | bdf = amd_iommu_alias_table[i]; | ||
1016 | if (amd_iommu_rlookup_table[bdf] == iommu) | ||
1017 | return 1; | ||
1018 | } | ||
1019 | |||
1020 | return 0; | ||
1021 | } | ||
1022 | |||
1023 | /* | ||
1024 | * This function actually applies the mapping to the page table of the | ||
1025 | * dma_ops domain. | ||
1026 | */ | ||
1027 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | ||
1028 | struct unity_map_entry *e) | ||
1029 | { | ||
1030 | u64 addr; | ||
1031 | int ret; | ||
1032 | |||
1033 | for (addr = e->address_start; addr < e->address_end; | ||
1034 | addr += PAGE_SIZE) { | ||
1035 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, | ||
1036 | PAGE_SIZE); | ||
1037 | if (ret) | ||
1038 | return ret; | ||
1039 | /* | ||
1040 | * if unity mapping is in aperture range mark the page | ||
1041 | * as allocated in the aperture | ||
1042 | */ | ||
1043 | if (addr < dma_dom->aperture_size) | ||
1044 | __set_bit(addr >> PAGE_SHIFT, | ||
1045 | dma_dom->aperture[0]->bitmap); | ||
1046 | } | ||
1047 | |||
1048 | return 0; | ||
1049 | } | ||
1050 | |||
1051 | /* | ||
1052 | * Init the unity mappings for a specific IOMMU in the system | ||
1053 | * | ||
1054 | * Basically iterates over all unity mapping entries and applies them to | ||
1055 | * the default domain DMA of that IOMMU if necessary. | ||
1056 | */ | ||
1057 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | ||
1058 | { | ||
1059 | struct unity_map_entry *entry; | ||
1060 | int ret; | ||
1061 | |||
1062 | list_for_each_entry(entry, &amd_iommu_unity_map, list) { | ||
1063 | if (!iommu_for_unity_map(iommu, entry)) | ||
1064 | continue; | ||
1065 | ret = dma_ops_unity_map(iommu->default_dom, entry); | ||
1066 | if (ret) | ||
1067 | return ret; | ||
1068 | } | ||
1069 | |||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | /* | ||
1074 | * Inits the unity mappings required for a specific device | ||
1075 | */ | ||
1076 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | ||
1077 | u16 devid) | ||
1078 | { | ||
1079 | struct unity_map_entry *e; | ||
1080 | int ret; | ||
1081 | |||
1082 | list_for_each_entry(e, &amd_iommu_unity_map, list) { | ||
1083 | if (!(devid >= e->devid_start && devid <= e->devid_end)) | ||
1084 | continue; | ||
1085 | ret = dma_ops_unity_map(dma_dom, e); | ||
1086 | if (ret) | ||
1087 | return ret; | ||
1088 | } | ||
1089 | |||
1090 | return 0; | ||
1091 | } | ||
1092 | |||
1093 | /**************************************************************************** | ||
1094 | * | ||
1095 | * The next functions belong to the address allocator for the dma_ops | ||
1096 | * interface functions. They work like the allocators in the other IOMMU | ||
1097 | * drivers. Its basically a bitmap which marks the allocated pages in | ||
1098 | * the aperture. Maybe it could be enhanced in the future to a more | ||
1099 | * efficient allocator. | ||
1100 | * | ||
1101 | ****************************************************************************/ | ||
1102 | |||
1103 | /* | ||
1104 | * The address allocator core functions. | ||
1105 | * | ||
1106 | * called with domain->lock held | ||
1107 | */ | ||
1108 | |||
1109 | /* | ||
1110 | * Used to reserve address ranges in the aperture (e.g. for exclusion | ||
1111 | * ranges. | ||
1112 | */ | ||
1113 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
1114 | unsigned long start_page, | ||
1115 | unsigned int pages) | ||
1116 | { | ||
1117 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; | ||
1118 | |||
1119 | if (start_page + pages > last_page) | ||
1120 | pages = last_page - start_page; | ||
1121 | |||
1122 | for (i = start_page; i < start_page + pages; ++i) { | ||
1123 | int index = i / APERTURE_RANGE_PAGES; | ||
1124 | int page = i % APERTURE_RANGE_PAGES; | ||
1125 | __set_bit(page, dom->aperture[index]->bitmap); | ||
1126 | } | ||
1127 | } | ||
1128 | |||
1129 | /* | ||
1130 | * This function is used to add a new aperture range to an existing | ||
1131 | * aperture in case of dma_ops domain allocation or address allocation | ||
1132 | * failure. | ||
1133 | */ | ||
1134 | static int alloc_new_range(struct dma_ops_domain *dma_dom, | ||
1135 | bool populate, gfp_t gfp) | ||
1136 | { | ||
1137 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; | ||
1138 | struct amd_iommu *iommu; | ||
1139 | unsigned long i; | ||
1140 | |||
1141 | #ifdef CONFIG_IOMMU_STRESS | ||
1142 | populate = false; | ||
1143 | #endif | ||
1144 | |||
1145 | if (index >= APERTURE_MAX_RANGES) | ||
1146 | return -ENOMEM; | ||
1147 | |||
1148 | dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); | ||
1149 | if (!dma_dom->aperture[index]) | ||
1150 | return -ENOMEM; | ||
1151 | |||
1152 | dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); | ||
1153 | if (!dma_dom->aperture[index]->bitmap) | ||
1154 | goto out_free; | ||
1155 | |||
1156 | dma_dom->aperture[index]->offset = dma_dom->aperture_size; | ||
1157 | |||
1158 | if (populate) { | ||
1159 | unsigned long address = dma_dom->aperture_size; | ||
1160 | int i, num_ptes = APERTURE_RANGE_PAGES / 512; | ||
1161 | u64 *pte, *pte_page; | ||
1162 | |||
1163 | for (i = 0; i < num_ptes; ++i) { | ||
1164 | pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, | ||
1165 | &pte_page, gfp); | ||
1166 | if (!pte) | ||
1167 | goto out_free; | ||
1168 | |||
1169 | dma_dom->aperture[index]->pte_pages[i] = pte_page; | ||
1170 | |||
1171 | address += APERTURE_RANGE_SIZE / 64; | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; | ||
1176 | |||
1177 | /* Initialize the exclusion range if necessary */ | ||
1178 | for_each_iommu(iommu) { | ||
1179 | if (iommu->exclusion_start && | ||
1180 | iommu->exclusion_start >= dma_dom->aperture[index]->offset | ||
1181 | && iommu->exclusion_start < dma_dom->aperture_size) { | ||
1182 | unsigned long startpage; | ||
1183 | int pages = iommu_num_pages(iommu->exclusion_start, | ||
1184 | iommu->exclusion_length, | ||
1185 | PAGE_SIZE); | ||
1186 | startpage = iommu->exclusion_start >> PAGE_SHIFT; | ||
1187 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | ||
1188 | } | ||
1189 | } | ||
1190 | |||
1191 | /* | ||
1192 | * Check for areas already mapped as present in the new aperture | ||
1193 | * range and mark those pages as reserved in the allocator. Such | ||
1194 | * mappings may already exist as a result of requested unity | ||
1195 | * mappings for devices. | ||
1196 | */ | ||
1197 | for (i = dma_dom->aperture[index]->offset; | ||
1198 | i < dma_dom->aperture_size; | ||
1199 | i += PAGE_SIZE) { | ||
1200 | u64 *pte = fetch_pte(&dma_dom->domain, i); | ||
1201 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | ||
1202 | continue; | ||
1203 | |||
1204 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); | ||
1205 | } | ||
1206 | |||
1207 | update_domain(&dma_dom->domain); | ||
1208 | |||
1209 | return 0; | ||
1210 | |||
1211 | out_free: | ||
1212 | update_domain(&dma_dom->domain); | ||
1213 | |||
1214 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); | ||
1215 | |||
1216 | kfree(dma_dom->aperture[index]); | ||
1217 | dma_dom->aperture[index] = NULL; | ||
1218 | |||
1219 | return -ENOMEM; | ||
1220 | } | ||
1221 | |||
1222 | static unsigned long dma_ops_area_alloc(struct device *dev, | ||
1223 | struct dma_ops_domain *dom, | ||
1224 | unsigned int pages, | ||
1225 | unsigned long align_mask, | ||
1226 | u64 dma_mask, | ||
1227 | unsigned long start) | ||
1228 | { | ||
1229 | unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; | ||
1230 | int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; | ||
1231 | int i = start >> APERTURE_RANGE_SHIFT; | ||
1232 | unsigned long boundary_size; | ||
1233 | unsigned long address = -1; | ||
1234 | unsigned long limit; | ||
1235 | |||
1236 | next_bit >>= PAGE_SHIFT; | ||
1237 | |||
1238 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | ||
1239 | PAGE_SIZE) >> PAGE_SHIFT; | ||
1240 | |||
1241 | for (;i < max_index; ++i) { | ||
1242 | unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; | ||
1243 | |||
1244 | if (dom->aperture[i]->offset >= dma_mask) | ||
1245 | break; | ||
1246 | |||
1247 | limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, | ||
1248 | dma_mask >> PAGE_SHIFT); | ||
1249 | |||
1250 | address = iommu_area_alloc(dom->aperture[i]->bitmap, | ||
1251 | limit, next_bit, pages, 0, | ||
1252 | boundary_size, align_mask); | ||
1253 | if (address != -1) { | ||
1254 | address = dom->aperture[i]->offset + | ||
1255 | (address << PAGE_SHIFT); | ||
1256 | dom->next_address = address + (pages << PAGE_SHIFT); | ||
1257 | break; | ||
1258 | } | ||
1259 | |||
1260 | next_bit = 0; | ||
1261 | } | ||
1262 | |||
1263 | return address; | ||
1264 | } | ||
1265 | |||
1266 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | ||
1267 | struct dma_ops_domain *dom, | ||
1268 | unsigned int pages, | ||
1269 | unsigned long align_mask, | ||
1270 | u64 dma_mask) | ||
1271 | { | ||
1272 | unsigned long address; | ||
1273 | |||
1274 | #ifdef CONFIG_IOMMU_STRESS | ||
1275 | dom->next_address = 0; | ||
1276 | dom->need_flush = true; | ||
1277 | #endif | ||
1278 | |||
1279 | address = dma_ops_area_alloc(dev, dom, pages, align_mask, | ||
1280 | dma_mask, dom->next_address); | ||
1281 | |||
1282 | if (address == -1) { | ||
1283 | dom->next_address = 0; | ||
1284 | address = dma_ops_area_alloc(dev, dom, pages, align_mask, | ||
1285 | dma_mask, 0); | ||
1286 | dom->need_flush = true; | ||
1287 | } | ||
1288 | |||
1289 | if (unlikely(address == -1)) | ||
1290 | address = DMA_ERROR_CODE; | ||
1291 | |||
1292 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); | ||
1293 | |||
1294 | return address; | ||
1295 | } | ||
1296 | |||
1297 | /* | ||
1298 | * The address free function. | ||
1299 | * | ||
1300 | * called with domain->lock held | ||
1301 | */ | ||
1302 | static void dma_ops_free_addresses(struct dma_ops_domain *dom, | ||
1303 | unsigned long address, | ||
1304 | unsigned int pages) | ||
1305 | { | ||
1306 | unsigned i = address >> APERTURE_RANGE_SHIFT; | ||
1307 | struct aperture_range *range = dom->aperture[i]; | ||
1308 | |||
1309 | BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); | ||
1310 | |||
1311 | #ifdef CONFIG_IOMMU_STRESS | ||
1312 | if (i < 4) | ||
1313 | return; | ||
1314 | #endif | ||
1315 | |||
1316 | if (address >= dom->next_address) | ||
1317 | dom->need_flush = true; | ||
1318 | |||
1319 | address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; | ||
1320 | |||
1321 | bitmap_clear(range->bitmap, address, pages); | ||
1322 | |||
1323 | } | ||
1324 | |||
1325 | /**************************************************************************** | ||
1326 | * | ||
1327 | * The next functions belong to the domain allocation. A domain is | ||
1328 | * allocated for every IOMMU as the default domain. If device isolation | ||
1329 | * is enabled, every device get its own domain. The most important thing | ||
1330 | * about domains is the page table mapping the DMA address space they | ||
1331 | * contain. | ||
1332 | * | ||
1333 | ****************************************************************************/ | ||
1334 | |||
1335 | /* | ||
1336 | * This function adds a protection domain to the global protection domain list | ||
1337 | */ | ||
1338 | static void add_domain_to_list(struct protection_domain *domain) | ||
1339 | { | ||
1340 | unsigned long flags; | ||
1341 | |||
1342 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1343 | list_add(&domain->list, &amd_iommu_pd_list); | ||
1344 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1345 | } | ||
1346 | |||
1347 | /* | ||
1348 | * This function removes a protection domain to the global | ||
1349 | * protection domain list | ||
1350 | */ | ||
1351 | static void del_domain_from_list(struct protection_domain *domain) | ||
1352 | { | ||
1353 | unsigned long flags; | ||
1354 | |||
1355 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1356 | list_del(&domain->list); | ||
1357 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1358 | } | ||
1359 | |||
1360 | static u16 domain_id_alloc(void) | ||
1361 | { | ||
1362 | unsigned long flags; | ||
1363 | int id; | ||
1364 | |||
1365 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1366 | id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); | ||
1367 | BUG_ON(id == 0); | ||
1368 | if (id > 0 && id < MAX_DOMAIN_ID) | ||
1369 | __set_bit(id, amd_iommu_pd_alloc_bitmap); | ||
1370 | else | ||
1371 | id = 0; | ||
1372 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1373 | |||
1374 | return id; | ||
1375 | } | ||
1376 | |||
1377 | static void domain_id_free(int id) | ||
1378 | { | ||
1379 | unsigned long flags; | ||
1380 | |||
1381 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1382 | if (id > 0 && id < MAX_DOMAIN_ID) | ||
1383 | __clear_bit(id, amd_iommu_pd_alloc_bitmap); | ||
1384 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1385 | } | ||
1386 | |||
1387 | static void free_pagetable(struct protection_domain *domain) | ||
1388 | { | ||
1389 | int i, j; | ||
1390 | u64 *p1, *p2, *p3; | ||
1391 | |||
1392 | p1 = domain->pt_root; | ||
1393 | |||
1394 | if (!p1) | ||
1395 | return; | ||
1396 | |||
1397 | for (i = 0; i < 512; ++i) { | ||
1398 | if (!IOMMU_PTE_PRESENT(p1[i])) | ||
1399 | continue; | ||
1400 | |||
1401 | p2 = IOMMU_PTE_PAGE(p1[i]); | ||
1402 | for (j = 0; j < 512; ++j) { | ||
1403 | if (!IOMMU_PTE_PRESENT(p2[j])) | ||
1404 | continue; | ||
1405 | p3 = IOMMU_PTE_PAGE(p2[j]); | ||
1406 | free_page((unsigned long)p3); | ||
1407 | } | ||
1408 | |||
1409 | free_page((unsigned long)p2); | ||
1410 | } | ||
1411 | |||
1412 | free_page((unsigned long)p1); | ||
1413 | |||
1414 | domain->pt_root = NULL; | ||
1415 | } | ||
1416 | |||
1417 | /* | ||
1418 | * Free a domain, only used if something went wrong in the | ||
1419 | * allocation path and we need to free an already allocated page table | ||
1420 | */ | ||
1421 | static void dma_ops_domain_free(struct dma_ops_domain *dom) | ||
1422 | { | ||
1423 | int i; | ||
1424 | |||
1425 | if (!dom) | ||
1426 | return; | ||
1427 | |||
1428 | del_domain_from_list(&dom->domain); | ||
1429 | |||
1430 | free_pagetable(&dom->domain); | ||
1431 | |||
1432 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { | ||
1433 | if (!dom->aperture[i]) | ||
1434 | continue; | ||
1435 | free_page((unsigned long)dom->aperture[i]->bitmap); | ||
1436 | kfree(dom->aperture[i]); | ||
1437 | } | ||
1438 | |||
1439 | kfree(dom); | ||
1440 | } | ||
1441 | |||
1442 | /* | ||
1443 | * Allocates a new protection domain usable for the dma_ops functions. | ||
1444 | * It also initializes the page table and the address allocator data | ||
1445 | * structures required for the dma_ops interface | ||
1446 | */ | ||
1447 | static struct dma_ops_domain *dma_ops_domain_alloc(void) | ||
1448 | { | ||
1449 | struct dma_ops_domain *dma_dom; | ||
1450 | |||
1451 | dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); | ||
1452 | if (!dma_dom) | ||
1453 | return NULL; | ||
1454 | |||
1455 | spin_lock_init(&dma_dom->domain.lock); | ||
1456 | |||
1457 | dma_dom->domain.id = domain_id_alloc(); | ||
1458 | if (dma_dom->domain.id == 0) | ||
1459 | goto free_dma_dom; | ||
1460 | INIT_LIST_HEAD(&dma_dom->domain.dev_list); | ||
1461 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; | ||
1462 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | ||
1463 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | ||
1464 | dma_dom->domain.priv = dma_dom; | ||
1465 | if (!dma_dom->domain.pt_root) | ||
1466 | goto free_dma_dom; | ||
1467 | |||
1468 | dma_dom->need_flush = false; | ||
1469 | dma_dom->target_dev = 0xffff; | ||
1470 | |||
1471 | add_domain_to_list(&dma_dom->domain); | ||
1472 | |||
1473 | if (alloc_new_range(dma_dom, true, GFP_KERNEL)) | ||
1474 | goto free_dma_dom; | ||
1475 | |||
1476 | /* | ||
1477 | * mark the first page as allocated so we never return 0 as | ||
1478 | * a valid dma-address. So we can use 0 as error value | ||
1479 | */ | ||
1480 | dma_dom->aperture[0]->bitmap[0] = 1; | ||
1481 | dma_dom->next_address = 0; | ||
1482 | |||
1483 | |||
1484 | return dma_dom; | ||
1485 | |||
1486 | free_dma_dom: | ||
1487 | dma_ops_domain_free(dma_dom); | ||
1488 | |||
1489 | return NULL; | ||
1490 | } | ||
1491 | |||
1492 | /* | ||
1493 | * little helper function to check whether a given protection domain is a | ||
1494 | * dma_ops domain | ||
1495 | */ | ||
1496 | static bool dma_ops_domain(struct protection_domain *domain) | ||
1497 | { | ||
1498 | return domain->flags & PD_DMA_OPS_MASK; | ||
1499 | } | ||
1500 | |||
1501 | static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) | ||
1502 | { | ||
1503 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1504 | u32 flags = 0; | ||
1505 | |||
1506 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1507 | << DEV_ENTRY_MODE_SHIFT; | ||
1508 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1509 | |||
1510 | if (ats) | ||
1511 | flags |= DTE_FLAG_IOTLB; | ||
1512 | |||
1513 | amd_iommu_dev_table[devid].data[3] |= flags; | ||
1514 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1515 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1516 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
1517 | } | ||
1518 | |||
1519 | static void clear_dte_entry(u16 devid) | ||
1520 | { | ||
1521 | /* remove entry from the device table seen by the hardware */ | ||
1522 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1523 | amd_iommu_dev_table[devid].data[1] = 0; | ||
1524 | amd_iommu_dev_table[devid].data[2] = 0; | ||
1525 | |||
1526 | amd_iommu_apply_erratum_63(devid); | ||
1527 | } | ||
1528 | |||
1529 | static void do_attach(struct device *dev, struct protection_domain *domain) | ||
1530 | { | ||
1531 | struct iommu_dev_data *dev_data; | ||
1532 | struct amd_iommu *iommu; | ||
1533 | struct pci_dev *pdev; | ||
1534 | bool ats = false; | ||
1535 | u16 devid; | ||
1536 | |||
1537 | devid = get_device_id(dev); | ||
1538 | iommu = amd_iommu_rlookup_table[devid]; | ||
1539 | dev_data = get_dev_data(dev); | ||
1540 | pdev = to_pci_dev(dev); | ||
1541 | |||
1542 | if (amd_iommu_iotlb_sup) | ||
1543 | ats = pci_ats_enabled(pdev); | ||
1544 | |||
1545 | /* Update data structures */ | ||
1546 | dev_data->domain = domain; | ||
1547 | list_add(&dev_data->list, &domain->dev_list); | ||
1548 | set_dte_entry(devid, domain, ats); | ||
1549 | |||
1550 | /* Do reference counting */ | ||
1551 | domain->dev_iommu[iommu->index] += 1; | ||
1552 | domain->dev_cnt += 1; | ||
1553 | |||
1554 | /* Flush the DTE entry */ | ||
1555 | device_flush_dte(dev); | ||
1556 | } | ||
1557 | |||
1558 | static void do_detach(struct device *dev) | ||
1559 | { | ||
1560 | struct iommu_dev_data *dev_data; | ||
1561 | struct amd_iommu *iommu; | ||
1562 | u16 devid; | ||
1563 | |||
1564 | devid = get_device_id(dev); | ||
1565 | iommu = amd_iommu_rlookup_table[devid]; | ||
1566 | dev_data = get_dev_data(dev); | ||
1567 | |||
1568 | /* decrease reference counters */ | ||
1569 | dev_data->domain->dev_iommu[iommu->index] -= 1; | ||
1570 | dev_data->domain->dev_cnt -= 1; | ||
1571 | |||
1572 | /* Update data structures */ | ||
1573 | dev_data->domain = NULL; | ||
1574 | list_del(&dev_data->list); | ||
1575 | clear_dte_entry(devid); | ||
1576 | |||
1577 | /* Flush the DTE entry */ | ||
1578 | device_flush_dte(dev); | ||
1579 | } | ||
1580 | |||
1581 | /* | ||
1582 | * If a device is not yet associated with a domain, this function does | ||
1583 | * assigns it visible for the hardware | ||
1584 | */ | ||
1585 | static int __attach_device(struct device *dev, | ||
1586 | struct protection_domain *domain) | ||
1587 | { | ||
1588 | struct iommu_dev_data *dev_data, *alias_data; | ||
1589 | int ret; | ||
1590 | |||
1591 | dev_data = get_dev_data(dev); | ||
1592 | alias_data = get_dev_data(dev_data->alias); | ||
1593 | |||
1594 | if (!alias_data) | ||
1595 | return -EINVAL; | ||
1596 | |||
1597 | /* lock domain */ | ||
1598 | spin_lock(&domain->lock); | ||
1599 | |||
1600 | /* Some sanity checks */ | ||
1601 | ret = -EBUSY; | ||
1602 | if (alias_data->domain != NULL && | ||
1603 | alias_data->domain != domain) | ||
1604 | goto out_unlock; | ||
1605 | |||
1606 | if (dev_data->domain != NULL && | ||
1607 | dev_data->domain != domain) | ||
1608 | goto out_unlock; | ||
1609 | |||
1610 | /* Do real assignment */ | ||
1611 | if (dev_data->alias != dev) { | ||
1612 | alias_data = get_dev_data(dev_data->alias); | ||
1613 | if (alias_data->domain == NULL) | ||
1614 | do_attach(dev_data->alias, domain); | ||
1615 | |||
1616 | atomic_inc(&alias_data->bind); | ||
1617 | } | ||
1618 | |||
1619 | if (dev_data->domain == NULL) | ||
1620 | do_attach(dev, domain); | ||
1621 | |||
1622 | atomic_inc(&dev_data->bind); | ||
1623 | |||
1624 | ret = 0; | ||
1625 | |||
1626 | out_unlock: | ||
1627 | |||
1628 | /* ready */ | ||
1629 | spin_unlock(&domain->lock); | ||
1630 | |||
1631 | return ret; | ||
1632 | } | ||
1633 | |||
1634 | /* | ||
1635 | * If a device is not yet associated with a domain, this function does | ||
1636 | * assigns it visible for the hardware | ||
1637 | */ | ||
1638 | static int attach_device(struct device *dev, | ||
1639 | struct protection_domain *domain) | ||
1640 | { | ||
1641 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1642 | unsigned long flags; | ||
1643 | int ret; | ||
1644 | |||
1645 | if (amd_iommu_iotlb_sup) | ||
1646 | pci_enable_ats(pdev, PAGE_SHIFT); | ||
1647 | |||
1648 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1649 | ret = __attach_device(dev, domain); | ||
1650 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1651 | |||
1652 | /* | ||
1653 | * We might boot into a crash-kernel here. The crashed kernel | ||
1654 | * left the caches in the IOMMU dirty. So we have to flush | ||
1655 | * here to evict all dirty stuff. | ||
1656 | */ | ||
1657 | domain_flush_tlb_pde(domain); | ||
1658 | |||
1659 | return ret; | ||
1660 | } | ||
1661 | |||
1662 | /* | ||
1663 | * Removes a device from a protection domain (unlocked) | ||
1664 | */ | ||
1665 | static void __detach_device(struct device *dev) | ||
1666 | { | ||
1667 | struct iommu_dev_data *dev_data = get_dev_data(dev); | ||
1668 | struct iommu_dev_data *alias_data; | ||
1669 | struct protection_domain *domain; | ||
1670 | unsigned long flags; | ||
1671 | |||
1672 | BUG_ON(!dev_data->domain); | ||
1673 | |||
1674 | domain = dev_data->domain; | ||
1675 | |||
1676 | spin_lock_irqsave(&domain->lock, flags); | ||
1677 | |||
1678 | if (dev_data->alias != dev) { | ||
1679 | alias_data = get_dev_data(dev_data->alias); | ||
1680 | if (atomic_dec_and_test(&alias_data->bind)) | ||
1681 | do_detach(dev_data->alias); | ||
1682 | } | ||
1683 | |||
1684 | if (atomic_dec_and_test(&dev_data->bind)) | ||
1685 | do_detach(dev); | ||
1686 | |||
1687 | spin_unlock_irqrestore(&domain->lock, flags); | ||
1688 | |||
1689 | /* | ||
1690 | * If we run in passthrough mode the device must be assigned to the | ||
1691 | * passthrough domain if it is detached from any other domain. | ||
1692 | * Make sure we can deassign from the pt_domain itself. | ||
1693 | */ | ||
1694 | if (iommu_pass_through && | ||
1695 | (dev_data->domain == NULL && domain != pt_domain)) | ||
1696 | __attach_device(dev, pt_domain); | ||
1697 | } | ||
1698 | |||
1699 | /* | ||
1700 | * Removes a device from a protection domain (with devtable_lock held) | ||
1701 | */ | ||
1702 | static void detach_device(struct device *dev) | ||
1703 | { | ||
1704 | struct pci_dev *pdev = to_pci_dev(dev); | ||
1705 | unsigned long flags; | ||
1706 | |||
1707 | /* lock device table */ | ||
1708 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1709 | __detach_device(dev); | ||
1710 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1711 | |||
1712 | if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev)) | ||
1713 | pci_disable_ats(pdev); | ||
1714 | } | ||
1715 | |||
1716 | /* | ||
1717 | * Find out the protection domain structure for a given PCI device. This | ||
1718 | * will give us the pointer to the page table root for example. | ||
1719 | */ | ||
1720 | static struct protection_domain *domain_for_device(struct device *dev) | ||
1721 | { | ||
1722 | struct protection_domain *dom; | ||
1723 | struct iommu_dev_data *dev_data, *alias_data; | ||
1724 | unsigned long flags; | ||
1725 | u16 devid; | ||
1726 | |||
1727 | devid = get_device_id(dev); | ||
1728 | dev_data = get_dev_data(dev); | ||
1729 | alias_data = get_dev_data(dev_data->alias); | ||
1730 | if (!alias_data) | ||
1731 | return NULL; | ||
1732 | |||
1733 | read_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1734 | dom = dev_data->domain; | ||
1735 | if (dom == NULL && | ||
1736 | alias_data->domain != NULL) { | ||
1737 | __attach_device(dev, alias_data->domain); | ||
1738 | dom = alias_data->domain; | ||
1739 | } | ||
1740 | |||
1741 | read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1742 | |||
1743 | return dom; | ||
1744 | } | ||
1745 | |||
1746 | static int device_change_notifier(struct notifier_block *nb, | ||
1747 | unsigned long action, void *data) | ||
1748 | { | ||
1749 | struct device *dev = data; | ||
1750 | u16 devid; | ||
1751 | struct protection_domain *domain; | ||
1752 | struct dma_ops_domain *dma_domain; | ||
1753 | struct amd_iommu *iommu; | ||
1754 | unsigned long flags; | ||
1755 | |||
1756 | if (!check_device(dev)) | ||
1757 | return 0; | ||
1758 | |||
1759 | devid = get_device_id(dev); | ||
1760 | iommu = amd_iommu_rlookup_table[devid]; | ||
1761 | |||
1762 | switch (action) { | ||
1763 | case BUS_NOTIFY_UNBOUND_DRIVER: | ||
1764 | |||
1765 | domain = domain_for_device(dev); | ||
1766 | |||
1767 | if (!domain) | ||
1768 | goto out; | ||
1769 | if (iommu_pass_through) | ||
1770 | break; | ||
1771 | detach_device(dev); | ||
1772 | break; | ||
1773 | case BUS_NOTIFY_ADD_DEVICE: | ||
1774 | |||
1775 | iommu_init_device(dev); | ||
1776 | |||
1777 | domain = domain_for_device(dev); | ||
1778 | |||
1779 | /* allocate a protection domain if a device is added */ | ||
1780 | dma_domain = find_protection_domain(devid); | ||
1781 | if (dma_domain) | ||
1782 | goto out; | ||
1783 | dma_domain = dma_ops_domain_alloc(); | ||
1784 | if (!dma_domain) | ||
1785 | goto out; | ||
1786 | dma_domain->target_dev = devid; | ||
1787 | |||
1788 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
1789 | list_add_tail(&dma_domain->list, &iommu_pd_list); | ||
1790 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
1791 | |||
1792 | break; | ||
1793 | case BUS_NOTIFY_DEL_DEVICE: | ||
1794 | |||
1795 | iommu_uninit_device(dev); | ||
1796 | |||
1797 | default: | ||
1798 | goto out; | ||
1799 | } | ||
1800 | |||
1801 | device_flush_dte(dev); | ||
1802 | iommu_completion_wait(iommu); | ||
1803 | |||
1804 | out: | ||
1805 | return 0; | ||
1806 | } | ||
1807 | |||
1808 | static struct notifier_block device_nb = { | ||
1809 | .notifier_call = device_change_notifier, | ||
1810 | }; | ||
1811 | |||
1812 | void amd_iommu_init_notifier(void) | ||
1813 | { | ||
1814 | bus_register_notifier(&pci_bus_type, &device_nb); | ||
1815 | } | ||
1816 | |||
1817 | /***************************************************************************** | ||
1818 | * | ||
1819 | * The next functions belong to the dma_ops mapping/unmapping code. | ||
1820 | * | ||
1821 | *****************************************************************************/ | ||
1822 | |||
1823 | /* | ||
1824 | * In the dma_ops path we only have the struct device. This function | ||
1825 | * finds the corresponding IOMMU, the protection domain and the | ||
1826 | * requestor id for a given device. | ||
1827 | * If the device is not yet associated with a domain this is also done | ||
1828 | * in this function. | ||
1829 | */ | ||
1830 | static struct protection_domain *get_domain(struct device *dev) | ||
1831 | { | ||
1832 | struct protection_domain *domain; | ||
1833 | struct dma_ops_domain *dma_dom; | ||
1834 | u16 devid = get_device_id(dev); | ||
1835 | |||
1836 | if (!check_device(dev)) | ||
1837 | return ERR_PTR(-EINVAL); | ||
1838 | |||
1839 | domain = domain_for_device(dev); | ||
1840 | if (domain != NULL && !dma_ops_domain(domain)) | ||
1841 | return ERR_PTR(-EBUSY); | ||
1842 | |||
1843 | if (domain != NULL) | ||
1844 | return domain; | ||
1845 | |||
1846 | /* Device not bount yet - bind it */ | ||
1847 | dma_dom = find_protection_domain(devid); | ||
1848 | if (!dma_dom) | ||
1849 | dma_dom = amd_iommu_rlookup_table[devid]->default_dom; | ||
1850 | attach_device(dev, &dma_dom->domain); | ||
1851 | DUMP_printk("Using protection domain %d for device %s\n", | ||
1852 | dma_dom->domain.id, dev_name(dev)); | ||
1853 | |||
1854 | return &dma_dom->domain; | ||
1855 | } | ||
1856 | |||
1857 | static void update_device_table(struct protection_domain *domain) | ||
1858 | { | ||
1859 | struct iommu_dev_data *dev_data; | ||
1860 | |||
1861 | list_for_each_entry(dev_data, &domain->dev_list, list) { | ||
1862 | struct pci_dev *pdev = to_pci_dev(dev_data->dev); | ||
1863 | u16 devid = get_device_id(dev_data->dev); | ||
1864 | set_dte_entry(devid, domain, pci_ats_enabled(pdev)); | ||
1865 | } | ||
1866 | } | ||
1867 | |||
1868 | static void update_domain(struct protection_domain *domain) | ||
1869 | { | ||
1870 | if (!domain->updated) | ||
1871 | return; | ||
1872 | |||
1873 | update_device_table(domain); | ||
1874 | |||
1875 | domain_flush_devices(domain); | ||
1876 | domain_flush_tlb_pde(domain); | ||
1877 | |||
1878 | domain->updated = false; | ||
1879 | } | ||
1880 | |||
1881 | /* | ||
1882 | * This function fetches the PTE for a given address in the aperture | ||
1883 | */ | ||
1884 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | ||
1885 | unsigned long address) | ||
1886 | { | ||
1887 | struct aperture_range *aperture; | ||
1888 | u64 *pte, *pte_page; | ||
1889 | |||
1890 | aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; | ||
1891 | if (!aperture) | ||
1892 | return NULL; | ||
1893 | |||
1894 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | ||
1895 | if (!pte) { | ||
1896 | pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, | ||
1897 | GFP_ATOMIC); | ||
1898 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | ||
1899 | } else | ||
1900 | pte += PM_LEVEL_INDEX(0, address); | ||
1901 | |||
1902 | update_domain(&dom->domain); | ||
1903 | |||
1904 | return pte; | ||
1905 | } | ||
1906 | |||
1907 | /* | ||
1908 | * This is the generic map function. It maps one 4kb page at paddr to | ||
1909 | * the given address in the DMA address space for the domain. | ||
1910 | */ | ||
1911 | static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, | ||
1912 | unsigned long address, | ||
1913 | phys_addr_t paddr, | ||
1914 | int direction) | ||
1915 | { | ||
1916 | u64 *pte, __pte; | ||
1917 | |||
1918 | WARN_ON(address > dom->aperture_size); | ||
1919 | |||
1920 | paddr &= PAGE_MASK; | ||
1921 | |||
1922 | pte = dma_ops_get_pte(dom, address); | ||
1923 | if (!pte) | ||
1924 | return DMA_ERROR_CODE; | ||
1925 | |||
1926 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
1927 | |||
1928 | if (direction == DMA_TO_DEVICE) | ||
1929 | __pte |= IOMMU_PTE_IR; | ||
1930 | else if (direction == DMA_FROM_DEVICE) | ||
1931 | __pte |= IOMMU_PTE_IW; | ||
1932 | else if (direction == DMA_BIDIRECTIONAL) | ||
1933 | __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; | ||
1934 | |||
1935 | WARN_ON(*pte); | ||
1936 | |||
1937 | *pte = __pte; | ||
1938 | |||
1939 | return (dma_addr_t)address; | ||
1940 | } | ||
1941 | |||
1942 | /* | ||
1943 | * The generic unmapping function for on page in the DMA address space. | ||
1944 | */ | ||
1945 | static void dma_ops_domain_unmap(struct dma_ops_domain *dom, | ||
1946 | unsigned long address) | ||
1947 | { | ||
1948 | struct aperture_range *aperture; | ||
1949 | u64 *pte; | ||
1950 | |||
1951 | if (address >= dom->aperture_size) | ||
1952 | return; | ||
1953 | |||
1954 | aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; | ||
1955 | if (!aperture) | ||
1956 | return; | ||
1957 | |||
1958 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | ||
1959 | if (!pte) | ||
1960 | return; | ||
1961 | |||
1962 | pte += PM_LEVEL_INDEX(0, address); | ||
1963 | |||
1964 | WARN_ON(!*pte); | ||
1965 | |||
1966 | *pte = 0ULL; | ||
1967 | } | ||
1968 | |||
1969 | /* | ||
1970 | * This function contains common code for mapping of a physically | ||
1971 | * contiguous memory region into DMA address space. It is used by all | ||
1972 | * mapping functions provided with this IOMMU driver. | ||
1973 | * Must be called with the domain lock held. | ||
1974 | */ | ||
1975 | static dma_addr_t __map_single(struct device *dev, | ||
1976 | struct dma_ops_domain *dma_dom, | ||
1977 | phys_addr_t paddr, | ||
1978 | size_t size, | ||
1979 | int dir, | ||
1980 | bool align, | ||
1981 | u64 dma_mask) | ||
1982 | { | ||
1983 | dma_addr_t offset = paddr & ~PAGE_MASK; | ||
1984 | dma_addr_t address, start, ret; | ||
1985 | unsigned int pages; | ||
1986 | unsigned long align_mask = 0; | ||
1987 | int i; | ||
1988 | |||
1989 | pages = iommu_num_pages(paddr, size, PAGE_SIZE); | ||
1990 | paddr &= PAGE_MASK; | ||
1991 | |||
1992 | INC_STATS_COUNTER(total_map_requests); | ||
1993 | |||
1994 | if (pages > 1) | ||
1995 | INC_STATS_COUNTER(cross_page); | ||
1996 | |||
1997 | if (align) | ||
1998 | align_mask = (1UL << get_order(size)) - 1; | ||
1999 | |||
2000 | retry: | ||
2001 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | ||
2002 | dma_mask); | ||
2003 | if (unlikely(address == DMA_ERROR_CODE)) { | ||
2004 | /* | ||
2005 | * setting next_address here will let the address | ||
2006 | * allocator only scan the new allocated range in the | ||
2007 | * first run. This is a small optimization. | ||
2008 | */ | ||
2009 | dma_dom->next_address = dma_dom->aperture_size; | ||
2010 | |||
2011 | if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) | ||
2012 | goto out; | ||
2013 | |||
2014 | /* | ||
2015 | * aperture was successfully enlarged by 128 MB, try | ||
2016 | * allocation again | ||
2017 | */ | ||
2018 | goto retry; | ||
2019 | } | ||
2020 | |||
2021 | start = address; | ||
2022 | for (i = 0; i < pages; ++i) { | ||
2023 | ret = dma_ops_domain_map(dma_dom, start, paddr, dir); | ||
2024 | if (ret == DMA_ERROR_CODE) | ||
2025 | goto out_unmap; | ||
2026 | |||
2027 | paddr += PAGE_SIZE; | ||
2028 | start += PAGE_SIZE; | ||
2029 | } | ||
2030 | address += offset; | ||
2031 | |||
2032 | ADD_STATS_COUNTER(alloced_io_mem, size); | ||
2033 | |||
2034 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | ||
2035 | domain_flush_tlb(&dma_dom->domain); | ||
2036 | dma_dom->need_flush = false; | ||
2037 | } else if (unlikely(amd_iommu_np_cache)) | ||
2038 | domain_flush_pages(&dma_dom->domain, address, size); | ||
2039 | |||
2040 | out: | ||
2041 | return address; | ||
2042 | |||
2043 | out_unmap: | ||
2044 | |||
2045 | for (--i; i >= 0; --i) { | ||
2046 | start -= PAGE_SIZE; | ||
2047 | dma_ops_domain_unmap(dma_dom, start); | ||
2048 | } | ||
2049 | |||
2050 | dma_ops_free_addresses(dma_dom, address, pages); | ||
2051 | |||
2052 | return DMA_ERROR_CODE; | ||
2053 | } | ||
2054 | |||
2055 | /* | ||
2056 | * Does the reverse of the __map_single function. Must be called with | ||
2057 | * the domain lock held too | ||
2058 | */ | ||
2059 | static void __unmap_single(struct dma_ops_domain *dma_dom, | ||
2060 | dma_addr_t dma_addr, | ||
2061 | size_t size, | ||
2062 | int dir) | ||
2063 | { | ||
2064 | dma_addr_t flush_addr; | ||
2065 | dma_addr_t i, start; | ||
2066 | unsigned int pages; | ||
2067 | |||
2068 | if ((dma_addr == DMA_ERROR_CODE) || | ||
2069 | (dma_addr + size > dma_dom->aperture_size)) | ||
2070 | return; | ||
2071 | |||
2072 | flush_addr = dma_addr; | ||
2073 | pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); | ||
2074 | dma_addr &= PAGE_MASK; | ||
2075 | start = dma_addr; | ||
2076 | |||
2077 | for (i = 0; i < pages; ++i) { | ||
2078 | dma_ops_domain_unmap(dma_dom, start); | ||
2079 | start += PAGE_SIZE; | ||
2080 | } | ||
2081 | |||
2082 | SUB_STATS_COUNTER(alloced_io_mem, size); | ||
2083 | |||
2084 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | ||
2085 | |||
2086 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | ||
2087 | domain_flush_pages(&dma_dom->domain, flush_addr, size); | ||
2088 | dma_dom->need_flush = false; | ||
2089 | } | ||
2090 | } | ||
2091 | |||
2092 | /* | ||
2093 | * The exported map_single function for dma_ops. | ||
2094 | */ | ||
2095 | static dma_addr_t map_page(struct device *dev, struct page *page, | ||
2096 | unsigned long offset, size_t size, | ||
2097 | enum dma_data_direction dir, | ||
2098 | struct dma_attrs *attrs) | ||
2099 | { | ||
2100 | unsigned long flags; | ||
2101 | struct protection_domain *domain; | ||
2102 | dma_addr_t addr; | ||
2103 | u64 dma_mask; | ||
2104 | phys_addr_t paddr = page_to_phys(page) + offset; | ||
2105 | |||
2106 | INC_STATS_COUNTER(cnt_map_single); | ||
2107 | |||
2108 | domain = get_domain(dev); | ||
2109 | if (PTR_ERR(domain) == -EINVAL) | ||
2110 | return (dma_addr_t)paddr; | ||
2111 | else if (IS_ERR(domain)) | ||
2112 | return DMA_ERROR_CODE; | ||
2113 | |||
2114 | dma_mask = *dev->dma_mask; | ||
2115 | |||
2116 | spin_lock_irqsave(&domain->lock, flags); | ||
2117 | |||
2118 | addr = __map_single(dev, domain->priv, paddr, size, dir, false, | ||
2119 | dma_mask); | ||
2120 | if (addr == DMA_ERROR_CODE) | ||
2121 | goto out; | ||
2122 | |||
2123 | domain_flush_complete(domain); | ||
2124 | |||
2125 | out: | ||
2126 | spin_unlock_irqrestore(&domain->lock, flags); | ||
2127 | |||
2128 | return addr; | ||
2129 | } | ||
2130 | |||
2131 | /* | ||
2132 | * The exported unmap_single function for dma_ops. | ||
2133 | */ | ||
2134 | static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | ||
2135 | enum dma_data_direction dir, struct dma_attrs *attrs) | ||
2136 | { | ||
2137 | unsigned long flags; | ||
2138 | struct protection_domain *domain; | ||
2139 | |||
2140 | INC_STATS_COUNTER(cnt_unmap_single); | ||
2141 | |||
2142 | domain = get_domain(dev); | ||
2143 | if (IS_ERR(domain)) | ||
2144 | return; | ||
2145 | |||
2146 | spin_lock_irqsave(&domain->lock, flags); | ||
2147 | |||
2148 | __unmap_single(domain->priv, dma_addr, size, dir); | ||
2149 | |||
2150 | domain_flush_complete(domain); | ||
2151 | |||
2152 | spin_unlock_irqrestore(&domain->lock, flags); | ||
2153 | } | ||
2154 | |||
2155 | /* | ||
2156 | * This is a special map_sg function which is used if we should map a | ||
2157 | * device which is not handled by an AMD IOMMU in the system. | ||
2158 | */ | ||
2159 | static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, | ||
2160 | int nelems, int dir) | ||
2161 | { | ||
2162 | struct scatterlist *s; | ||
2163 | int i; | ||
2164 | |||
2165 | for_each_sg(sglist, s, nelems, i) { | ||
2166 | s->dma_address = (dma_addr_t)sg_phys(s); | ||
2167 | s->dma_length = s->length; | ||
2168 | } | ||
2169 | |||
2170 | return nelems; | ||
2171 | } | ||
2172 | |||
2173 | /* | ||
2174 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
2175 | * lists). | ||
2176 | */ | ||
2177 | static int map_sg(struct device *dev, struct scatterlist *sglist, | ||
2178 | int nelems, enum dma_data_direction dir, | ||
2179 | struct dma_attrs *attrs) | ||
2180 | { | ||
2181 | unsigned long flags; | ||
2182 | struct protection_domain *domain; | ||
2183 | int i; | ||
2184 | struct scatterlist *s; | ||
2185 | phys_addr_t paddr; | ||
2186 | int mapped_elems = 0; | ||
2187 | u64 dma_mask; | ||
2188 | |||
2189 | INC_STATS_COUNTER(cnt_map_sg); | ||
2190 | |||
2191 | domain = get_domain(dev); | ||
2192 | if (PTR_ERR(domain) == -EINVAL) | ||
2193 | return map_sg_no_iommu(dev, sglist, nelems, dir); | ||
2194 | else if (IS_ERR(domain)) | ||
2195 | return 0; | ||
2196 | |||
2197 | dma_mask = *dev->dma_mask; | ||
2198 | |||
2199 | spin_lock_irqsave(&domain->lock, flags); | ||
2200 | |||
2201 | for_each_sg(sglist, s, nelems, i) { | ||
2202 | paddr = sg_phys(s); | ||
2203 | |||
2204 | s->dma_address = __map_single(dev, domain->priv, | ||
2205 | paddr, s->length, dir, false, | ||
2206 | dma_mask); | ||
2207 | |||
2208 | if (s->dma_address) { | ||
2209 | s->dma_length = s->length; | ||
2210 | mapped_elems++; | ||
2211 | } else | ||
2212 | goto unmap; | ||
2213 | } | ||
2214 | |||
2215 | domain_flush_complete(domain); | ||
2216 | |||
2217 | out: | ||
2218 | spin_unlock_irqrestore(&domain->lock, flags); | ||
2219 | |||
2220 | return mapped_elems; | ||
2221 | unmap: | ||
2222 | for_each_sg(sglist, s, mapped_elems, i) { | ||
2223 | if (s->dma_address) | ||
2224 | __unmap_single(domain->priv, s->dma_address, | ||
2225 | s->dma_length, dir); | ||
2226 | s->dma_address = s->dma_length = 0; | ||
2227 | } | ||
2228 | |||
2229 | mapped_elems = 0; | ||
2230 | |||
2231 | goto out; | ||
2232 | } | ||
2233 | |||
2234 | /* | ||
2235 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
2236 | * lists). | ||
2237 | */ | ||
2238 | static void unmap_sg(struct device *dev, struct scatterlist *sglist, | ||
2239 | int nelems, enum dma_data_direction dir, | ||
2240 | struct dma_attrs *attrs) | ||
2241 | { | ||
2242 | unsigned long flags; | ||
2243 | struct protection_domain *domain; | ||
2244 | struct scatterlist *s; | ||
2245 | int i; | ||
2246 | |||
2247 | INC_STATS_COUNTER(cnt_unmap_sg); | ||
2248 | |||
2249 | domain = get_domain(dev); | ||
2250 | if (IS_ERR(domain)) | ||
2251 | return; | ||
2252 | |||
2253 | spin_lock_irqsave(&domain->lock, flags); | ||
2254 | |||
2255 | for_each_sg(sglist, s, nelems, i) { | ||
2256 | __unmap_single(domain->priv, s->dma_address, | ||
2257 | s->dma_length, dir); | ||
2258 | s->dma_address = s->dma_length = 0; | ||
2259 | } | ||
2260 | |||
2261 | domain_flush_complete(domain); | ||
2262 | |||
2263 | spin_unlock_irqrestore(&domain->lock, flags); | ||
2264 | } | ||
2265 | |||
2266 | /* | ||
2267 | * The exported alloc_coherent function for dma_ops. | ||
2268 | */ | ||
2269 | static void *alloc_coherent(struct device *dev, size_t size, | ||
2270 | dma_addr_t *dma_addr, gfp_t flag) | ||
2271 | { | ||
2272 | unsigned long flags; | ||
2273 | void *virt_addr; | ||
2274 | struct protection_domain *domain; | ||
2275 | phys_addr_t paddr; | ||
2276 | u64 dma_mask = dev->coherent_dma_mask; | ||
2277 | |||
2278 | INC_STATS_COUNTER(cnt_alloc_coherent); | ||
2279 | |||
2280 | domain = get_domain(dev); | ||
2281 | if (PTR_ERR(domain) == -EINVAL) { | ||
2282 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | ||
2283 | *dma_addr = __pa(virt_addr); | ||
2284 | return virt_addr; | ||
2285 | } else if (IS_ERR(domain)) | ||
2286 | return NULL; | ||
2287 | |||
2288 | dma_mask = dev->coherent_dma_mask; | ||
2289 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
2290 | flag |= __GFP_ZERO; | ||
2291 | |||
2292 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | ||
2293 | if (!virt_addr) | ||
2294 | return NULL; | ||
2295 | |||
2296 | paddr = virt_to_phys(virt_addr); | ||
2297 | |||
2298 | if (!dma_mask) | ||
2299 | dma_mask = *dev->dma_mask; | ||
2300 | |||
2301 | spin_lock_irqsave(&domain->lock, flags); | ||
2302 | |||
2303 | *dma_addr = __map_single(dev, domain->priv, paddr, | ||
2304 | size, DMA_BIDIRECTIONAL, true, dma_mask); | ||
2305 | |||
2306 | if (*dma_addr == DMA_ERROR_CODE) { | ||
2307 | spin_unlock_irqrestore(&domain->lock, flags); | ||
2308 | goto out_free; | ||
2309 | } | ||
2310 | |||
2311 | domain_flush_complete(domain); | ||
2312 | |||
2313 | spin_unlock_irqrestore(&domain->lock, flags); | ||
2314 | |||
2315 | return virt_addr; | ||
2316 | |||
2317 | out_free: | ||
2318 | |||
2319 | free_pages((unsigned long)virt_addr, get_order(size)); | ||
2320 | |||
2321 | return NULL; | ||
2322 | } | ||
2323 | |||
2324 | /* | ||
2325 | * The exported free_coherent function for dma_ops. | ||
2326 | */ | ||
2327 | static void free_coherent(struct device *dev, size_t size, | ||
2328 | void *virt_addr, dma_addr_t dma_addr) | ||
2329 | { | ||
2330 | unsigned long flags; | ||
2331 | struct protection_domain *domain; | ||
2332 | |||
2333 | INC_STATS_COUNTER(cnt_free_coherent); | ||
2334 | |||
2335 | domain = get_domain(dev); | ||
2336 | if (IS_ERR(domain)) | ||
2337 | goto free_mem; | ||
2338 | |||
2339 | spin_lock_irqsave(&domain->lock, flags); | ||
2340 | |||
2341 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | ||
2342 | |||
2343 | domain_flush_complete(domain); | ||
2344 | |||
2345 | spin_unlock_irqrestore(&domain->lock, flags); | ||
2346 | |||
2347 | free_mem: | ||
2348 | free_pages((unsigned long)virt_addr, get_order(size)); | ||
2349 | } | ||
2350 | |||
2351 | /* | ||
2352 | * This function is called by the DMA layer to find out if we can handle a | ||
2353 | * particular device. It is part of the dma_ops. | ||
2354 | */ | ||
2355 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | ||
2356 | { | ||
2357 | return check_device(dev); | ||
2358 | } | ||
2359 | |||
2360 | /* | ||
2361 | * The function for pre-allocating protection domains. | ||
2362 | * | ||
2363 | * If the driver core informs the DMA layer if a driver grabs a device | ||
2364 | * we don't need to preallocate the protection domains anymore. | ||
2365 | * For now we have to. | ||
2366 | */ | ||
2367 | static void prealloc_protection_domains(void) | ||
2368 | { | ||
2369 | struct pci_dev *dev = NULL; | ||
2370 | struct dma_ops_domain *dma_dom; | ||
2371 | u16 devid; | ||
2372 | |||
2373 | for_each_pci_dev(dev) { | ||
2374 | |||
2375 | /* Do we handle this device? */ | ||
2376 | if (!check_device(&dev->dev)) | ||
2377 | continue; | ||
2378 | |||
2379 | /* Is there already any domain for it? */ | ||
2380 | if (domain_for_device(&dev->dev)) | ||
2381 | continue; | ||
2382 | |||
2383 | devid = get_device_id(&dev->dev); | ||
2384 | |||
2385 | dma_dom = dma_ops_domain_alloc(); | ||
2386 | if (!dma_dom) | ||
2387 | continue; | ||
2388 | init_unity_mappings_for_device(dma_dom, devid); | ||
2389 | dma_dom->target_dev = devid; | ||
2390 | |||
2391 | attach_device(&dev->dev, &dma_dom->domain); | ||
2392 | |||
2393 | list_add_tail(&dma_dom->list, &iommu_pd_list); | ||
2394 | } | ||
2395 | } | ||
2396 | |||
2397 | static struct dma_map_ops amd_iommu_dma_ops = { | ||
2398 | .alloc_coherent = alloc_coherent, | ||
2399 | .free_coherent = free_coherent, | ||
2400 | .map_page = map_page, | ||
2401 | .unmap_page = unmap_page, | ||
2402 | .map_sg = map_sg, | ||
2403 | .unmap_sg = unmap_sg, | ||
2404 | .dma_supported = amd_iommu_dma_supported, | ||
2405 | }; | ||
2406 | |||
2407 | static unsigned device_dma_ops_init(void) | ||
2408 | { | ||
2409 | struct pci_dev *pdev = NULL; | ||
2410 | unsigned unhandled = 0; | ||
2411 | |||
2412 | for_each_pci_dev(pdev) { | ||
2413 | if (!check_device(&pdev->dev)) { | ||
2414 | unhandled += 1; | ||
2415 | continue; | ||
2416 | } | ||
2417 | |||
2418 | pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; | ||
2419 | } | ||
2420 | |||
2421 | return unhandled; | ||
2422 | } | ||
2423 | |||
2424 | /* | ||
2425 | * The function which clues the AMD IOMMU driver into dma_ops. | ||
2426 | */ | ||
2427 | |||
2428 | void __init amd_iommu_init_api(void) | ||
2429 | { | ||
2430 | register_iommu(&amd_iommu_ops); | ||
2431 | } | ||
2432 | |||
2433 | int __init amd_iommu_init_dma_ops(void) | ||
2434 | { | ||
2435 | struct amd_iommu *iommu; | ||
2436 | int ret, unhandled; | ||
2437 | |||
2438 | /* | ||
2439 | * first allocate a default protection domain for every IOMMU we | ||
2440 | * found in the system. Devices not assigned to any other | ||
2441 | * protection domain will be assigned to the default one. | ||
2442 | */ | ||
2443 | for_each_iommu(iommu) { | ||
2444 | iommu->default_dom = dma_ops_domain_alloc(); | ||
2445 | if (iommu->default_dom == NULL) | ||
2446 | return -ENOMEM; | ||
2447 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; | ||
2448 | ret = iommu_init_unity_mappings(iommu); | ||
2449 | if (ret) | ||
2450 | goto free_domains; | ||
2451 | } | ||
2452 | |||
2453 | /* | ||
2454 | * Pre-allocate the protection domains for each device. | ||
2455 | */ | ||
2456 | prealloc_protection_domains(); | ||
2457 | |||
2458 | iommu_detected = 1; | ||
2459 | swiotlb = 0; | ||
2460 | |||
2461 | /* Make the driver finally visible to the drivers */ | ||
2462 | unhandled = device_dma_ops_init(); | ||
2463 | if (unhandled && max_pfn > MAX_DMA32_PFN) { | ||
2464 | /* There are unhandled devices - initialize swiotlb for them */ | ||
2465 | swiotlb = 1; | ||
2466 | } | ||
2467 | |||
2468 | amd_iommu_stats_init(); | ||
2469 | |||
2470 | return 0; | ||
2471 | |||
2472 | free_domains: | ||
2473 | |||
2474 | for_each_iommu(iommu) { | ||
2475 | if (iommu->default_dom) | ||
2476 | dma_ops_domain_free(iommu->default_dom); | ||
2477 | } | ||
2478 | |||
2479 | return ret; | ||
2480 | } | ||
2481 | |||
2482 | /***************************************************************************** | ||
2483 | * | ||
2484 | * The following functions belong to the exported interface of AMD IOMMU | ||
2485 | * | ||
2486 | * This interface allows access to lower level functions of the IOMMU | ||
2487 | * like protection domain handling and assignement of devices to domains | ||
2488 | * which is not possible with the dma_ops interface. | ||
2489 | * | ||
2490 | *****************************************************************************/ | ||
2491 | |||
2492 | static void cleanup_domain(struct protection_domain *domain) | ||
2493 | { | ||
2494 | struct iommu_dev_data *dev_data, *next; | ||
2495 | unsigned long flags; | ||
2496 | |||
2497 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
2498 | |||
2499 | list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { | ||
2500 | struct device *dev = dev_data->dev; | ||
2501 | |||
2502 | __detach_device(dev); | ||
2503 | atomic_set(&dev_data->bind, 0); | ||
2504 | } | ||
2505 | |||
2506 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
2507 | } | ||
2508 | |||
2509 | static void protection_domain_free(struct protection_domain *domain) | ||
2510 | { | ||
2511 | if (!domain) | ||
2512 | return; | ||
2513 | |||
2514 | del_domain_from_list(domain); | ||
2515 | |||
2516 | if (domain->id) | ||
2517 | domain_id_free(domain->id); | ||
2518 | |||
2519 | kfree(domain); | ||
2520 | } | ||
2521 | |||
2522 | static struct protection_domain *protection_domain_alloc(void) | ||
2523 | { | ||
2524 | struct protection_domain *domain; | ||
2525 | |||
2526 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | ||
2527 | if (!domain) | ||
2528 | return NULL; | ||
2529 | |||
2530 | spin_lock_init(&domain->lock); | ||
2531 | mutex_init(&domain->api_lock); | ||
2532 | domain->id = domain_id_alloc(); | ||
2533 | if (!domain->id) | ||
2534 | goto out_err; | ||
2535 | INIT_LIST_HEAD(&domain->dev_list); | ||
2536 | |||
2537 | add_domain_to_list(domain); | ||
2538 | |||
2539 | return domain; | ||
2540 | |||
2541 | out_err: | ||
2542 | kfree(domain); | ||
2543 | |||
2544 | return NULL; | ||
2545 | } | ||
2546 | |||
2547 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
2548 | { | ||
2549 | struct protection_domain *domain; | ||
2550 | |||
2551 | domain = protection_domain_alloc(); | ||
2552 | if (!domain) | ||
2553 | goto out_free; | ||
2554 | |||
2555 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2556 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | ||
2557 | if (!domain->pt_root) | ||
2558 | goto out_free; | ||
2559 | |||
2560 | dom->priv = domain; | ||
2561 | |||
2562 | return 0; | ||
2563 | |||
2564 | out_free: | ||
2565 | protection_domain_free(domain); | ||
2566 | |||
2567 | return -ENOMEM; | ||
2568 | } | ||
2569 | |||
2570 | static void amd_iommu_domain_destroy(struct iommu_domain *dom) | ||
2571 | { | ||
2572 | struct protection_domain *domain = dom->priv; | ||
2573 | |||
2574 | if (!domain) | ||
2575 | return; | ||
2576 | |||
2577 | if (domain->dev_cnt > 0) | ||
2578 | cleanup_domain(domain); | ||
2579 | |||
2580 | BUG_ON(domain->dev_cnt != 0); | ||
2581 | |||
2582 | free_pagetable(domain); | ||
2583 | |||
2584 | protection_domain_free(domain); | ||
2585 | |||
2586 | dom->priv = NULL; | ||
2587 | } | ||
2588 | |||
2589 | static void amd_iommu_detach_device(struct iommu_domain *dom, | ||
2590 | struct device *dev) | ||
2591 | { | ||
2592 | struct iommu_dev_data *dev_data = dev->archdata.iommu; | ||
2593 | struct amd_iommu *iommu; | ||
2594 | u16 devid; | ||
2595 | |||
2596 | if (!check_device(dev)) | ||
2597 | return; | ||
2598 | |||
2599 | devid = get_device_id(dev); | ||
2600 | |||
2601 | if (dev_data->domain != NULL) | ||
2602 | detach_device(dev); | ||
2603 | |||
2604 | iommu = amd_iommu_rlookup_table[devid]; | ||
2605 | if (!iommu) | ||
2606 | return; | ||
2607 | |||
2608 | device_flush_dte(dev); | ||
2609 | iommu_completion_wait(iommu); | ||
2610 | } | ||
2611 | |||
2612 | static int amd_iommu_attach_device(struct iommu_domain *dom, | ||
2613 | struct device *dev) | ||
2614 | { | ||
2615 | struct protection_domain *domain = dom->priv; | ||
2616 | struct iommu_dev_data *dev_data; | ||
2617 | struct amd_iommu *iommu; | ||
2618 | int ret; | ||
2619 | u16 devid; | ||
2620 | |||
2621 | if (!check_device(dev)) | ||
2622 | return -EINVAL; | ||
2623 | |||
2624 | dev_data = dev->archdata.iommu; | ||
2625 | |||
2626 | devid = get_device_id(dev); | ||
2627 | |||
2628 | iommu = amd_iommu_rlookup_table[devid]; | ||
2629 | if (!iommu) | ||
2630 | return -EINVAL; | ||
2631 | |||
2632 | if (dev_data->domain) | ||
2633 | detach_device(dev); | ||
2634 | |||
2635 | ret = attach_device(dev, domain); | ||
2636 | |||
2637 | iommu_completion_wait(iommu); | ||
2638 | |||
2639 | return ret; | ||
2640 | } | ||
2641 | |||
2642 | static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, | ||
2643 | phys_addr_t paddr, int gfp_order, int iommu_prot) | ||
2644 | { | ||
2645 | unsigned long page_size = 0x1000UL << gfp_order; | ||
2646 | struct protection_domain *domain = dom->priv; | ||
2647 | int prot = 0; | ||
2648 | int ret; | ||
2649 | |||
2650 | if (iommu_prot & IOMMU_READ) | ||
2651 | prot |= IOMMU_PROT_IR; | ||
2652 | if (iommu_prot & IOMMU_WRITE) | ||
2653 | prot |= IOMMU_PROT_IW; | ||
2654 | |||
2655 | mutex_lock(&domain->api_lock); | ||
2656 | ret = iommu_map_page(domain, iova, paddr, prot, page_size); | ||
2657 | mutex_unlock(&domain->api_lock); | ||
2658 | |||
2659 | return ret; | ||
2660 | } | ||
2661 | |||
2662 | static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, | ||
2663 | int gfp_order) | ||
2664 | { | ||
2665 | struct protection_domain *domain = dom->priv; | ||
2666 | unsigned long page_size, unmap_size; | ||
2667 | |||
2668 | page_size = 0x1000UL << gfp_order; | ||
2669 | |||
2670 | mutex_lock(&domain->api_lock); | ||
2671 | unmap_size = iommu_unmap_page(domain, iova, page_size); | ||
2672 | mutex_unlock(&domain->api_lock); | ||
2673 | |||
2674 | domain_flush_tlb_pde(domain); | ||
2675 | |||
2676 | return get_order(unmap_size); | ||
2677 | } | ||
2678 | |||
2679 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | ||
2680 | unsigned long iova) | ||
2681 | { | ||
2682 | struct protection_domain *domain = dom->priv; | ||
2683 | unsigned long offset_mask; | ||
2684 | phys_addr_t paddr; | ||
2685 | u64 *pte, __pte; | ||
2686 | |||
2687 | pte = fetch_pte(domain, iova); | ||
2688 | |||
2689 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | ||
2690 | return 0; | ||
2691 | |||
2692 | if (PM_PTE_LEVEL(*pte) == 0) | ||
2693 | offset_mask = PAGE_SIZE - 1; | ||
2694 | else | ||
2695 | offset_mask = PTE_PAGE_SIZE(*pte) - 1; | ||
2696 | |||
2697 | __pte = *pte & PM_ADDR_MASK; | ||
2698 | paddr = (__pte & ~offset_mask) | (iova & offset_mask); | ||
2699 | |||
2700 | return paddr; | ||
2701 | } | ||
2702 | |||
2703 | static int amd_iommu_domain_has_cap(struct iommu_domain *domain, | ||
2704 | unsigned long cap) | ||
2705 | { | ||
2706 | switch (cap) { | ||
2707 | case IOMMU_CAP_CACHE_COHERENCY: | ||
2708 | return 1; | ||
2709 | } | ||
2710 | |||
2711 | return 0; | ||
2712 | } | ||
2713 | |||
2714 | static struct iommu_ops amd_iommu_ops = { | ||
2715 | .domain_init = amd_iommu_domain_init, | ||
2716 | .domain_destroy = amd_iommu_domain_destroy, | ||
2717 | .attach_dev = amd_iommu_attach_device, | ||
2718 | .detach_dev = amd_iommu_detach_device, | ||
2719 | .map = amd_iommu_map, | ||
2720 | .unmap = amd_iommu_unmap, | ||
2721 | .iova_to_phys = amd_iommu_iova_to_phys, | ||
2722 | .domain_has_cap = amd_iommu_domain_has_cap, | ||
2723 | }; | ||
2724 | |||
2725 | /***************************************************************************** | ||
2726 | * | ||
2727 | * The next functions do a basic initialization of IOMMU for pass through | ||
2728 | * mode | ||
2729 | * | ||
2730 | * In passthrough mode the IOMMU is initialized and enabled but not used for | ||
2731 | * DMA-API translation. | ||
2732 | * | ||
2733 | *****************************************************************************/ | ||
2734 | |||
2735 | int __init amd_iommu_init_passthrough(void) | ||
2736 | { | ||
2737 | struct amd_iommu *iommu; | ||
2738 | struct pci_dev *dev = NULL; | ||
2739 | u16 devid; | ||
2740 | |||
2741 | /* allocate passthrough domain */ | ||
2742 | pt_domain = protection_domain_alloc(); | ||
2743 | if (!pt_domain) | ||
2744 | return -ENOMEM; | ||
2745 | |||
2746 | pt_domain->mode |= PAGE_MODE_NONE; | ||
2747 | |||
2748 | for_each_pci_dev(dev) { | ||
2749 | if (!check_device(&dev->dev)) | ||
2750 | continue; | ||
2751 | |||
2752 | devid = get_device_id(&dev->dev); | ||
2753 | |||
2754 | iommu = amd_iommu_rlookup_table[devid]; | ||
2755 | if (!iommu) | ||
2756 | continue; | ||
2757 | |||
2758 | attach_device(&dev->dev, pt_domain); | ||
2759 | } | ||
2760 | |||
2761 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | ||
2762 | |||
2763 | return 0; | ||
2764 | } | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c deleted file mode 100644 index bfc8453bd98d..000000000000 --- a/arch/x86/kernel/amd_iommu_init.c +++ /dev/null | |||
@@ -1,1572 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. | ||
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | ||
4 | * Leo Duran <leo.duran@amd.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include <linux/pci.h> | ||
21 | #include <linux/acpi.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/syscore_ops.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/msi.h> | ||
27 | #include <asm/pci-direct.h> | ||
28 | #include <asm/amd_iommu_proto.h> | ||
29 | #include <asm/amd_iommu_types.h> | ||
30 | #include <asm/amd_iommu.h> | ||
31 | #include <asm/iommu.h> | ||
32 | #include <asm/gart.h> | ||
33 | #include <asm/x86_init.h> | ||
34 | #include <asm/iommu_table.h> | ||
35 | /* | ||
36 | * definitions for the ACPI scanning code | ||
37 | */ | ||
38 | #define IVRS_HEADER_LENGTH 48 | ||
39 | |||
40 | #define ACPI_IVHD_TYPE 0x10 | ||
41 | #define ACPI_IVMD_TYPE_ALL 0x20 | ||
42 | #define ACPI_IVMD_TYPE 0x21 | ||
43 | #define ACPI_IVMD_TYPE_RANGE 0x22 | ||
44 | |||
45 | #define IVHD_DEV_ALL 0x01 | ||
46 | #define IVHD_DEV_SELECT 0x02 | ||
47 | #define IVHD_DEV_SELECT_RANGE_START 0x03 | ||
48 | #define IVHD_DEV_RANGE_END 0x04 | ||
49 | #define IVHD_DEV_ALIAS 0x42 | ||
50 | #define IVHD_DEV_ALIAS_RANGE 0x43 | ||
51 | #define IVHD_DEV_EXT_SELECT 0x46 | ||
52 | #define IVHD_DEV_EXT_SELECT_RANGE 0x47 | ||
53 | |||
54 | #define IVHD_FLAG_HT_TUN_EN_MASK 0x01 | ||
55 | #define IVHD_FLAG_PASSPW_EN_MASK 0x02 | ||
56 | #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 | ||
57 | #define IVHD_FLAG_ISOC_EN_MASK 0x08 | ||
58 | |||
59 | #define IVMD_FLAG_EXCL_RANGE 0x08 | ||
60 | #define IVMD_FLAG_UNITY_MAP 0x01 | ||
61 | |||
62 | #define ACPI_DEVFLAG_INITPASS 0x01 | ||
63 | #define ACPI_DEVFLAG_EXTINT 0x02 | ||
64 | #define ACPI_DEVFLAG_NMI 0x04 | ||
65 | #define ACPI_DEVFLAG_SYSMGT1 0x10 | ||
66 | #define ACPI_DEVFLAG_SYSMGT2 0x20 | ||
67 | #define ACPI_DEVFLAG_LINT0 0x40 | ||
68 | #define ACPI_DEVFLAG_LINT1 0x80 | ||
69 | #define ACPI_DEVFLAG_ATSDIS 0x10000000 | ||
70 | |||
71 | /* | ||
72 | * ACPI table definitions | ||
73 | * | ||
74 | * These data structures are laid over the table to parse the important values | ||
75 | * out of it. | ||
76 | */ | ||
77 | |||
78 | /* | ||
79 | * structure describing one IOMMU in the ACPI table. Typically followed by one | ||
80 | * or more ivhd_entrys. | ||
81 | */ | ||
82 | struct ivhd_header { | ||
83 | u8 type; | ||
84 | u8 flags; | ||
85 | u16 length; | ||
86 | u16 devid; | ||
87 | u16 cap_ptr; | ||
88 | u64 mmio_phys; | ||
89 | u16 pci_seg; | ||
90 | u16 info; | ||
91 | u32 reserved; | ||
92 | } __attribute__((packed)); | ||
93 | |||
94 | /* | ||
95 | * A device entry describing which devices a specific IOMMU translates and | ||
96 | * which requestor ids they use. | ||
97 | */ | ||
98 | struct ivhd_entry { | ||
99 | u8 type; | ||
100 | u16 devid; | ||
101 | u8 flags; | ||
102 | u32 ext; | ||
103 | } __attribute__((packed)); | ||
104 | |||
105 | /* | ||
106 | * An AMD IOMMU memory definition structure. It defines things like exclusion | ||
107 | * ranges for devices and regions that should be unity mapped. | ||
108 | */ | ||
109 | struct ivmd_header { | ||
110 | u8 type; | ||
111 | u8 flags; | ||
112 | u16 length; | ||
113 | u16 devid; | ||
114 | u16 aux; | ||
115 | u64 resv; | ||
116 | u64 range_start; | ||
117 | u64 range_length; | ||
118 | } __attribute__((packed)); | ||
119 | |||
120 | bool amd_iommu_dump; | ||
121 | |||
122 | static int __initdata amd_iommu_detected; | ||
123 | static bool __initdata amd_iommu_disabled; | ||
124 | |||
125 | u16 amd_iommu_last_bdf; /* largest PCI device id we have | ||
126 | to handle */ | ||
127 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | ||
128 | we find in ACPI */ | ||
129 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | ||
130 | |||
131 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | ||
132 | system */ | ||
133 | |||
134 | /* Array to assign indices to IOMMUs*/ | ||
135 | struct amd_iommu *amd_iommus[MAX_IOMMUS]; | ||
136 | int amd_iommus_present; | ||
137 | |||
138 | /* IOMMUs have a non-present cache? */ | ||
139 | bool amd_iommu_np_cache __read_mostly; | ||
140 | bool amd_iommu_iotlb_sup __read_mostly = true; | ||
141 | |||
142 | /* | ||
143 | * The ACPI table parsing functions set this variable on an error | ||
144 | */ | ||
145 | static int __initdata amd_iommu_init_err; | ||
146 | |||
147 | /* | ||
148 | * List of protection domains - used during resume | ||
149 | */ | ||
150 | LIST_HEAD(amd_iommu_pd_list); | ||
151 | spinlock_t amd_iommu_pd_lock; | ||
152 | |||
153 | /* | ||
154 | * Pointer to the device table which is shared by all AMD IOMMUs | ||
155 | * it is indexed by the PCI device id or the HT unit id and contains | ||
156 | * information about the domain the device belongs to as well as the | ||
157 | * page table root pointer. | ||
158 | */ | ||
159 | struct dev_table_entry *amd_iommu_dev_table; | ||
160 | |||
161 | /* | ||
162 | * The alias table is a driver specific data structure which contains the | ||
163 | * mappings of the PCI device ids to the actual requestor ids on the IOMMU. | ||
164 | * More than one device can share the same requestor id. | ||
165 | */ | ||
166 | u16 *amd_iommu_alias_table; | ||
167 | |||
168 | /* | ||
169 | * The rlookup table is used to find the IOMMU which is responsible | ||
170 | * for a specific device. It is also indexed by the PCI device id. | ||
171 | */ | ||
172 | struct amd_iommu **amd_iommu_rlookup_table; | ||
173 | |||
174 | /* | ||
175 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap | ||
176 | * to know which ones are already in use. | ||
177 | */ | ||
178 | unsigned long *amd_iommu_pd_alloc_bitmap; | ||
179 | |||
180 | static u32 dev_table_size; /* size of the device table */ | ||
181 | static u32 alias_table_size; /* size of the alias table */ | ||
182 | static u32 rlookup_table_size; /* size if the rlookup table */ | ||
183 | |||
184 | /* | ||
185 | * This function flushes all internal caches of | ||
186 | * the IOMMU used by this driver. | ||
187 | */ | ||
188 | extern void iommu_flush_all_caches(struct amd_iommu *iommu); | ||
189 | |||
190 | static inline void update_last_devid(u16 devid) | ||
191 | { | ||
192 | if (devid > amd_iommu_last_bdf) | ||
193 | amd_iommu_last_bdf = devid; | ||
194 | } | ||
195 | |||
196 | static inline unsigned long tbl_size(int entry_size) | ||
197 | { | ||
198 | unsigned shift = PAGE_SHIFT + | ||
199 | get_order(((int)amd_iommu_last_bdf + 1) * entry_size); | ||
200 | |||
201 | return 1UL << shift; | ||
202 | } | ||
203 | |||
204 | /* Access to l1 and l2 indexed register spaces */ | ||
205 | |||
206 | static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) | ||
207 | { | ||
208 | u32 val; | ||
209 | |||
210 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
211 | pci_read_config_dword(iommu->dev, 0xfc, &val); | ||
212 | return val; | ||
213 | } | ||
214 | |||
215 | static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) | ||
216 | { | ||
217 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); | ||
218 | pci_write_config_dword(iommu->dev, 0xfc, val); | ||
219 | pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); | ||
220 | } | ||
221 | |||
222 | static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) | ||
223 | { | ||
224 | u32 val; | ||
225 | |||
226 | pci_write_config_dword(iommu->dev, 0xf0, address); | ||
227 | pci_read_config_dword(iommu->dev, 0xf4, &val); | ||
228 | return val; | ||
229 | } | ||
230 | |||
231 | static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) | ||
232 | { | ||
233 | pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); | ||
234 | pci_write_config_dword(iommu->dev, 0xf4, val); | ||
235 | } | ||
236 | |||
237 | /**************************************************************************** | ||
238 | * | ||
239 | * AMD IOMMU MMIO register space handling functions | ||
240 | * | ||
241 | * These functions are used to program the IOMMU device registers in | ||
242 | * MMIO space required for that driver. | ||
243 | * | ||
244 | ****************************************************************************/ | ||
245 | |||
246 | /* | ||
247 | * This function set the exclusion range in the IOMMU. DMA accesses to the | ||
248 | * exclusion range are passed through untranslated | ||
249 | */ | ||
250 | static void iommu_set_exclusion_range(struct amd_iommu *iommu) | ||
251 | { | ||
252 | u64 start = iommu->exclusion_start & PAGE_MASK; | ||
253 | u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; | ||
254 | u64 entry; | ||
255 | |||
256 | if (!iommu->exclusion_start) | ||
257 | return; | ||
258 | |||
259 | entry = start | MMIO_EXCL_ENABLE_MASK; | ||
260 | memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, | ||
261 | &entry, sizeof(entry)); | ||
262 | |||
263 | entry = limit; | ||
264 | memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, | ||
265 | &entry, sizeof(entry)); | ||
266 | } | ||
267 | |||
268 | /* Programs the physical address of the device table into the IOMMU hardware */ | ||
269 | static void __init iommu_set_device_table(struct amd_iommu *iommu) | ||
270 | { | ||
271 | u64 entry; | ||
272 | |||
273 | BUG_ON(iommu->mmio_base == NULL); | ||
274 | |||
275 | entry = virt_to_phys(amd_iommu_dev_table); | ||
276 | entry |= (dev_table_size >> 12) - 1; | ||
277 | memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, | ||
278 | &entry, sizeof(entry)); | ||
279 | } | ||
280 | |||
281 | /* Generic functions to enable/disable certain features of the IOMMU. */ | ||
282 | static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) | ||
283 | { | ||
284 | u32 ctrl; | ||
285 | |||
286 | ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); | ||
287 | ctrl |= (1 << bit); | ||
288 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | ||
289 | } | ||
290 | |||
291 | static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | ||
292 | { | ||
293 | u32 ctrl; | ||
294 | |||
295 | ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); | ||
296 | ctrl &= ~(1 << bit); | ||
297 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | ||
298 | } | ||
299 | |||
300 | /* Function to enable the hardware */ | ||
301 | static void iommu_enable(struct amd_iommu *iommu) | ||
302 | { | ||
303 | static const char * const feat_str[] = { | ||
304 | "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", | ||
305 | "IA", "GA", "HE", "PC", NULL | ||
306 | }; | ||
307 | int i; | ||
308 | |||
309 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx", | ||
310 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | ||
311 | |||
312 | if (iommu->cap & (1 << IOMMU_CAP_EFR)) { | ||
313 | printk(KERN_CONT " extended features: "); | ||
314 | for (i = 0; feat_str[i]; ++i) | ||
315 | if (iommu_feature(iommu, (1ULL << i))) | ||
316 | printk(KERN_CONT " %s", feat_str[i]); | ||
317 | } | ||
318 | printk(KERN_CONT "\n"); | ||
319 | |||
320 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | ||
321 | } | ||
322 | |||
323 | static void iommu_disable(struct amd_iommu *iommu) | ||
324 | { | ||
325 | /* Disable command buffer */ | ||
326 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
327 | |||
328 | /* Disable event logging and event interrupts */ | ||
329 | iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); | ||
330 | iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); | ||
331 | |||
332 | /* Disable IOMMU hardware itself */ | ||
333 | iommu_feature_disable(iommu, CONTROL_IOMMU_EN); | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | ||
338 | * the system has one. | ||
339 | */ | ||
340 | static u8 * __init iommu_map_mmio_space(u64 address) | ||
341 | { | ||
342 | u8 *ret; | ||
343 | |||
344 | if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { | ||
345 | pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", | ||
346 | address); | ||
347 | pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); | ||
348 | return NULL; | ||
349 | } | ||
350 | |||
351 | ret = ioremap_nocache(address, MMIO_REGION_LENGTH); | ||
352 | if (ret != NULL) | ||
353 | return ret; | ||
354 | |||
355 | release_mem_region(address, MMIO_REGION_LENGTH); | ||
356 | |||
357 | return NULL; | ||
358 | } | ||
359 | |||
360 | static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | ||
361 | { | ||
362 | if (iommu->mmio_base) | ||
363 | iounmap(iommu->mmio_base); | ||
364 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); | ||
365 | } | ||
366 | |||
367 | /**************************************************************************** | ||
368 | * | ||
369 | * The functions below belong to the first pass of AMD IOMMU ACPI table | ||
370 | * parsing. In this pass we try to find out the highest device id this | ||
371 | * code has to handle. Upon this information the size of the shared data | ||
372 | * structures is determined later. | ||
373 | * | ||
374 | ****************************************************************************/ | ||
375 | |||
376 | /* | ||
377 | * This function calculates the length of a given IVHD entry | ||
378 | */ | ||
379 | static inline int ivhd_entry_length(u8 *ivhd) | ||
380 | { | ||
381 | return 0x04 << (*ivhd >> 6); | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * This function reads the last device id the IOMMU has to handle from the PCI | ||
386 | * capability header for this IOMMU | ||
387 | */ | ||
388 | static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) | ||
389 | { | ||
390 | u32 cap; | ||
391 | |||
392 | cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | ||
393 | update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); | ||
394 | |||
395 | return 0; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * After reading the highest device id from the IOMMU PCI capability header | ||
400 | * this function looks if there is a higher device id defined in the ACPI table | ||
401 | */ | ||
402 | static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | ||
403 | { | ||
404 | u8 *p = (void *)h, *end = (void *)h; | ||
405 | struct ivhd_entry *dev; | ||
406 | |||
407 | p += sizeof(*h); | ||
408 | end += h->length; | ||
409 | |||
410 | find_last_devid_on_pci(PCI_BUS(h->devid), | ||
411 | PCI_SLOT(h->devid), | ||
412 | PCI_FUNC(h->devid), | ||
413 | h->cap_ptr); | ||
414 | |||
415 | while (p < end) { | ||
416 | dev = (struct ivhd_entry *)p; | ||
417 | switch (dev->type) { | ||
418 | case IVHD_DEV_SELECT: | ||
419 | case IVHD_DEV_RANGE_END: | ||
420 | case IVHD_DEV_ALIAS: | ||
421 | case IVHD_DEV_EXT_SELECT: | ||
422 | /* all the above subfield types refer to device ids */ | ||
423 | update_last_devid(dev->devid); | ||
424 | break; | ||
425 | default: | ||
426 | break; | ||
427 | } | ||
428 | p += ivhd_entry_length(p); | ||
429 | } | ||
430 | |||
431 | WARN_ON(p != end); | ||
432 | |||
433 | return 0; | ||
434 | } | ||
435 | |||
436 | /* | ||
437 | * Iterate over all IVHD entries in the ACPI table and find the highest device | ||
438 | * id which we need to handle. This is the first of three functions which parse | ||
439 | * the ACPI table. So we check the checksum here. | ||
440 | */ | ||
441 | static int __init find_last_devid_acpi(struct acpi_table_header *table) | ||
442 | { | ||
443 | int i; | ||
444 | u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; | ||
445 | struct ivhd_header *h; | ||
446 | |||
447 | /* | ||
448 | * Validate checksum here so we don't need to do it when | ||
449 | * we actually parse the table | ||
450 | */ | ||
451 | for (i = 0; i < table->length; ++i) | ||
452 | checksum += p[i]; | ||
453 | if (checksum != 0) { | ||
454 | /* ACPI table corrupt */ | ||
455 | amd_iommu_init_err = -ENODEV; | ||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | p += IVRS_HEADER_LENGTH; | ||
460 | |||
461 | end += table->length; | ||
462 | while (p < end) { | ||
463 | h = (struct ivhd_header *)p; | ||
464 | switch (h->type) { | ||
465 | case ACPI_IVHD_TYPE: | ||
466 | find_last_devid_from_ivhd(h); | ||
467 | break; | ||
468 | default: | ||
469 | break; | ||
470 | } | ||
471 | p += h->length; | ||
472 | } | ||
473 | WARN_ON(p != end); | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | /**************************************************************************** | ||
479 | * | ||
480 | * The following functions belong the the code path which parses the ACPI table | ||
481 | * the second time. In this ACPI parsing iteration we allocate IOMMU specific | ||
482 | * data structures, initialize the device/alias/rlookup table and also | ||
483 | * basically initialize the hardware. | ||
484 | * | ||
485 | ****************************************************************************/ | ||
486 | |||
487 | /* | ||
488 | * Allocates the command buffer. This buffer is per AMD IOMMU. We can | ||
489 | * write commands to that buffer later and the IOMMU will execute them | ||
490 | * asynchronously | ||
491 | */ | ||
492 | static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | ||
493 | { | ||
494 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
495 | get_order(CMD_BUFFER_SIZE)); | ||
496 | |||
497 | if (cmd_buf == NULL) | ||
498 | return NULL; | ||
499 | |||
500 | iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; | ||
501 | |||
502 | return cmd_buf; | ||
503 | } | ||
504 | |||
505 | /* | ||
506 | * This function resets the command buffer if the IOMMU stopped fetching | ||
507 | * commands from it. | ||
508 | */ | ||
509 | void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) | ||
510 | { | ||
511 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
512 | |||
513 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
514 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
515 | |||
516 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
517 | } | ||
518 | |||
519 | /* | ||
520 | * This function writes the command buffer address to the hardware and | ||
521 | * enables it. | ||
522 | */ | ||
523 | static void iommu_enable_command_buffer(struct amd_iommu *iommu) | ||
524 | { | ||
525 | u64 entry; | ||
526 | |||
527 | BUG_ON(iommu->cmd_buf == NULL); | ||
528 | |||
529 | entry = (u64)virt_to_phys(iommu->cmd_buf); | ||
530 | entry |= MMIO_CMD_SIZE_512; | ||
531 | |||
532 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | ||
533 | &entry, sizeof(entry)); | ||
534 | |||
535 | amd_iommu_reset_cmd_buffer(iommu); | ||
536 | iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); | ||
537 | } | ||
538 | |||
539 | static void __init free_command_buffer(struct amd_iommu *iommu) | ||
540 | { | ||
541 | free_pages((unsigned long)iommu->cmd_buf, | ||
542 | get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); | ||
543 | } | ||
544 | |||
545 | /* allocates the memory where the IOMMU will log its events to */ | ||
546 | static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) | ||
547 | { | ||
548 | iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
549 | get_order(EVT_BUFFER_SIZE)); | ||
550 | |||
551 | if (iommu->evt_buf == NULL) | ||
552 | return NULL; | ||
553 | |||
554 | iommu->evt_buf_size = EVT_BUFFER_SIZE; | ||
555 | |||
556 | return iommu->evt_buf; | ||
557 | } | ||
558 | |||
559 | static void iommu_enable_event_buffer(struct amd_iommu *iommu) | ||
560 | { | ||
561 | u64 entry; | ||
562 | |||
563 | BUG_ON(iommu->evt_buf == NULL); | ||
564 | |||
565 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; | ||
566 | |||
567 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | ||
568 | &entry, sizeof(entry)); | ||
569 | |||
570 | /* set head and tail to zero manually */ | ||
571 | writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
572 | writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
573 | |||
574 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | ||
575 | } | ||
576 | |||
577 | static void __init free_event_buffer(struct amd_iommu *iommu) | ||
578 | { | ||
579 | free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); | ||
580 | } | ||
581 | |||
582 | /* sets a specific bit in the device table entry. */ | ||
583 | static void set_dev_entry_bit(u16 devid, u8 bit) | ||
584 | { | ||
585 | int i = (bit >> 5) & 0x07; | ||
586 | int _bit = bit & 0x1f; | ||
587 | |||
588 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); | ||
589 | } | ||
590 | |||
591 | static int get_dev_entry_bit(u16 devid, u8 bit) | ||
592 | { | ||
593 | int i = (bit >> 5) & 0x07; | ||
594 | int _bit = bit & 0x1f; | ||
595 | |||
596 | return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; | ||
597 | } | ||
598 | |||
599 | |||
600 | void amd_iommu_apply_erratum_63(u16 devid) | ||
601 | { | ||
602 | int sysmgt; | ||
603 | |||
604 | sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | | ||
605 | (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); | ||
606 | |||
607 | if (sysmgt == 0x01) | ||
608 | set_dev_entry_bit(devid, DEV_ENTRY_IW); | ||
609 | } | ||
610 | |||
611 | /* Writes the specific IOMMU for a device into the rlookup table */ | ||
612 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | ||
613 | { | ||
614 | amd_iommu_rlookup_table[devid] = iommu; | ||
615 | } | ||
616 | |||
617 | /* | ||
618 | * This function takes the device specific flags read from the ACPI | ||
619 | * table and sets up the device table entry with that information | ||
620 | */ | ||
621 | static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, | ||
622 | u16 devid, u32 flags, u32 ext_flags) | ||
623 | { | ||
624 | if (flags & ACPI_DEVFLAG_INITPASS) | ||
625 | set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); | ||
626 | if (flags & ACPI_DEVFLAG_EXTINT) | ||
627 | set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); | ||
628 | if (flags & ACPI_DEVFLAG_NMI) | ||
629 | set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); | ||
630 | if (flags & ACPI_DEVFLAG_SYSMGT1) | ||
631 | set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); | ||
632 | if (flags & ACPI_DEVFLAG_SYSMGT2) | ||
633 | set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); | ||
634 | if (flags & ACPI_DEVFLAG_LINT0) | ||
635 | set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); | ||
636 | if (flags & ACPI_DEVFLAG_LINT1) | ||
637 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); | ||
638 | |||
639 | amd_iommu_apply_erratum_63(devid); | ||
640 | |||
641 | set_iommu_for_device(iommu, devid); | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * Reads the device exclusion range from ACPI and initialize IOMMU with | ||
646 | * it | ||
647 | */ | ||
648 | static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | ||
649 | { | ||
650 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | ||
651 | |||
652 | if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) | ||
653 | return; | ||
654 | |||
655 | if (iommu) { | ||
656 | /* | ||
657 | * We only can configure exclusion ranges per IOMMU, not | ||
658 | * per device. But we can enable the exclusion range per | ||
659 | * device. This is done here | ||
660 | */ | ||
661 | set_dev_entry_bit(m->devid, DEV_ENTRY_EX); | ||
662 | iommu->exclusion_start = m->range_start; | ||
663 | iommu->exclusion_length = m->range_length; | ||
664 | } | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * This function reads some important data from the IOMMU PCI space and | ||
669 | * initializes the driver data structure with it. It reads the hardware | ||
670 | * capabilities and the first/last device entries | ||
671 | */ | ||
672 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | ||
673 | { | ||
674 | int cap_ptr = iommu->cap_ptr; | ||
675 | u32 range, misc, low, high; | ||
676 | int i, j; | ||
677 | |||
678 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, | ||
679 | &iommu->cap); | ||
680 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, | ||
681 | &range); | ||
682 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, | ||
683 | &misc); | ||
684 | |||
685 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), | ||
686 | MMIO_GET_FD(range)); | ||
687 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | ||
688 | MMIO_GET_LD(range)); | ||
689 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | ||
690 | |||
691 | if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) | ||
692 | amd_iommu_iotlb_sup = false; | ||
693 | |||
694 | /* read extended feature bits */ | ||
695 | low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); | ||
696 | high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); | ||
697 | |||
698 | iommu->features = ((u64)high << 32) | low; | ||
699 | |||
700 | if (!is_rd890_iommu(iommu->dev)) | ||
701 | return; | ||
702 | |||
703 | /* | ||
704 | * Some rd890 systems may not be fully reconfigured by the BIOS, so | ||
705 | * it's necessary for us to store this information so it can be | ||
706 | * reprogrammed on resume | ||
707 | */ | ||
708 | |||
709 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
710 | &iommu->stored_addr_lo); | ||
711 | pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
712 | &iommu->stored_addr_hi); | ||
713 | |||
714 | /* Low bit locks writes to configuration space */ | ||
715 | iommu->stored_addr_lo &= ~1; | ||
716 | |||
717 | for (i = 0; i < 6; i++) | ||
718 | for (j = 0; j < 0x12; j++) | ||
719 | iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); | ||
720 | |||
721 | for (i = 0; i < 0x83; i++) | ||
722 | iommu->stored_l2[i] = iommu_read_l2(iommu, i); | ||
723 | } | ||
724 | |||
725 | /* | ||
726 | * Takes a pointer to an AMD IOMMU entry in the ACPI table and | ||
727 | * initializes the hardware and our data structures with it. | ||
728 | */ | ||
729 | static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | ||
730 | struct ivhd_header *h) | ||
731 | { | ||
732 | u8 *p = (u8 *)h; | ||
733 | u8 *end = p, flags = 0; | ||
734 | u16 devid = 0, devid_start = 0, devid_to = 0; | ||
735 | u32 dev_i, ext_flags = 0; | ||
736 | bool alias = false; | ||
737 | struct ivhd_entry *e; | ||
738 | |||
739 | /* | ||
740 | * First save the recommended feature enable bits from ACPI | ||
741 | */ | ||
742 | iommu->acpi_flags = h->flags; | ||
743 | |||
744 | /* | ||
745 | * Done. Now parse the device entries | ||
746 | */ | ||
747 | p += sizeof(struct ivhd_header); | ||
748 | end += h->length; | ||
749 | |||
750 | |||
751 | while (p < end) { | ||
752 | e = (struct ivhd_entry *)p; | ||
753 | switch (e->type) { | ||
754 | case IVHD_DEV_ALL: | ||
755 | |||
756 | DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" | ||
757 | " last device %02x:%02x.%x flags: %02x\n", | ||
758 | PCI_BUS(iommu->first_device), | ||
759 | PCI_SLOT(iommu->first_device), | ||
760 | PCI_FUNC(iommu->first_device), | ||
761 | PCI_BUS(iommu->last_device), | ||
762 | PCI_SLOT(iommu->last_device), | ||
763 | PCI_FUNC(iommu->last_device), | ||
764 | e->flags); | ||
765 | |||
766 | for (dev_i = iommu->first_device; | ||
767 | dev_i <= iommu->last_device; ++dev_i) | ||
768 | set_dev_entry_from_acpi(iommu, dev_i, | ||
769 | e->flags, 0); | ||
770 | break; | ||
771 | case IVHD_DEV_SELECT: | ||
772 | |||
773 | DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " | ||
774 | "flags: %02x\n", | ||
775 | PCI_BUS(e->devid), | ||
776 | PCI_SLOT(e->devid), | ||
777 | PCI_FUNC(e->devid), | ||
778 | e->flags); | ||
779 | |||
780 | devid = e->devid; | ||
781 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); | ||
782 | break; | ||
783 | case IVHD_DEV_SELECT_RANGE_START: | ||
784 | |||
785 | DUMP_printk(" DEV_SELECT_RANGE_START\t " | ||
786 | "devid: %02x:%02x.%x flags: %02x\n", | ||
787 | PCI_BUS(e->devid), | ||
788 | PCI_SLOT(e->devid), | ||
789 | PCI_FUNC(e->devid), | ||
790 | e->flags); | ||
791 | |||
792 | devid_start = e->devid; | ||
793 | flags = e->flags; | ||
794 | ext_flags = 0; | ||
795 | alias = false; | ||
796 | break; | ||
797 | case IVHD_DEV_ALIAS: | ||
798 | |||
799 | DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " | ||
800 | "flags: %02x devid_to: %02x:%02x.%x\n", | ||
801 | PCI_BUS(e->devid), | ||
802 | PCI_SLOT(e->devid), | ||
803 | PCI_FUNC(e->devid), | ||
804 | e->flags, | ||
805 | PCI_BUS(e->ext >> 8), | ||
806 | PCI_SLOT(e->ext >> 8), | ||
807 | PCI_FUNC(e->ext >> 8)); | ||
808 | |||
809 | devid = e->devid; | ||
810 | devid_to = e->ext >> 8; | ||
811 | set_dev_entry_from_acpi(iommu, devid , e->flags, 0); | ||
812 | set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); | ||
813 | amd_iommu_alias_table[devid] = devid_to; | ||
814 | break; | ||
815 | case IVHD_DEV_ALIAS_RANGE: | ||
816 | |||
817 | DUMP_printk(" DEV_ALIAS_RANGE\t\t " | ||
818 | "devid: %02x:%02x.%x flags: %02x " | ||
819 | "devid_to: %02x:%02x.%x\n", | ||
820 | PCI_BUS(e->devid), | ||
821 | PCI_SLOT(e->devid), | ||
822 | PCI_FUNC(e->devid), | ||
823 | e->flags, | ||
824 | PCI_BUS(e->ext >> 8), | ||
825 | PCI_SLOT(e->ext >> 8), | ||
826 | PCI_FUNC(e->ext >> 8)); | ||
827 | |||
828 | devid_start = e->devid; | ||
829 | flags = e->flags; | ||
830 | devid_to = e->ext >> 8; | ||
831 | ext_flags = 0; | ||
832 | alias = true; | ||
833 | break; | ||
834 | case IVHD_DEV_EXT_SELECT: | ||
835 | |||
836 | DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " | ||
837 | "flags: %02x ext: %08x\n", | ||
838 | PCI_BUS(e->devid), | ||
839 | PCI_SLOT(e->devid), | ||
840 | PCI_FUNC(e->devid), | ||
841 | e->flags, e->ext); | ||
842 | |||
843 | devid = e->devid; | ||
844 | set_dev_entry_from_acpi(iommu, devid, e->flags, | ||
845 | e->ext); | ||
846 | break; | ||
847 | case IVHD_DEV_EXT_SELECT_RANGE: | ||
848 | |||
849 | DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " | ||
850 | "%02x:%02x.%x flags: %02x ext: %08x\n", | ||
851 | PCI_BUS(e->devid), | ||
852 | PCI_SLOT(e->devid), | ||
853 | PCI_FUNC(e->devid), | ||
854 | e->flags, e->ext); | ||
855 | |||
856 | devid_start = e->devid; | ||
857 | flags = e->flags; | ||
858 | ext_flags = e->ext; | ||
859 | alias = false; | ||
860 | break; | ||
861 | case IVHD_DEV_RANGE_END: | ||
862 | |||
863 | DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", | ||
864 | PCI_BUS(e->devid), | ||
865 | PCI_SLOT(e->devid), | ||
866 | PCI_FUNC(e->devid)); | ||
867 | |||
868 | devid = e->devid; | ||
869 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { | ||
870 | if (alias) { | ||
871 | amd_iommu_alias_table[dev_i] = devid_to; | ||
872 | set_dev_entry_from_acpi(iommu, | ||
873 | devid_to, flags, ext_flags); | ||
874 | } | ||
875 | set_dev_entry_from_acpi(iommu, dev_i, | ||
876 | flags, ext_flags); | ||
877 | } | ||
878 | break; | ||
879 | default: | ||
880 | break; | ||
881 | } | ||
882 | |||
883 | p += ivhd_entry_length(p); | ||
884 | } | ||
885 | } | ||
886 | |||
887 | /* Initializes the device->iommu mapping for the driver */ | ||
888 | static int __init init_iommu_devices(struct amd_iommu *iommu) | ||
889 | { | ||
890 | u32 i; | ||
891 | |||
892 | for (i = iommu->first_device; i <= iommu->last_device; ++i) | ||
893 | set_iommu_for_device(iommu, i); | ||
894 | |||
895 | return 0; | ||
896 | } | ||
897 | |||
898 | static void __init free_iommu_one(struct amd_iommu *iommu) | ||
899 | { | ||
900 | free_command_buffer(iommu); | ||
901 | free_event_buffer(iommu); | ||
902 | iommu_unmap_mmio_space(iommu); | ||
903 | } | ||
904 | |||
905 | static void __init free_iommu_all(void) | ||
906 | { | ||
907 | struct amd_iommu *iommu, *next; | ||
908 | |||
909 | for_each_iommu_safe(iommu, next) { | ||
910 | list_del(&iommu->list); | ||
911 | free_iommu_one(iommu); | ||
912 | kfree(iommu); | ||
913 | } | ||
914 | } | ||
915 | |||
916 | /* | ||
917 | * This function clues the initialization function for one IOMMU | ||
918 | * together and also allocates the command buffer and programs the | ||
919 | * hardware. It does NOT enable the IOMMU. This is done afterwards. | ||
920 | */ | ||
921 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | ||
922 | { | ||
923 | spin_lock_init(&iommu->lock); | ||
924 | |||
925 | /* Add IOMMU to internal data structures */ | ||
926 | list_add_tail(&iommu->list, &amd_iommu_list); | ||
927 | iommu->index = amd_iommus_present++; | ||
928 | |||
929 | if (unlikely(iommu->index >= MAX_IOMMUS)) { | ||
930 | WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); | ||
931 | return -ENOSYS; | ||
932 | } | ||
933 | |||
934 | /* Index is fine - add IOMMU to the array */ | ||
935 | amd_iommus[iommu->index] = iommu; | ||
936 | |||
937 | /* | ||
938 | * Copy data from ACPI table entry to the iommu struct | ||
939 | */ | ||
940 | iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); | ||
941 | if (!iommu->dev) | ||
942 | return 1; | ||
943 | |||
944 | iommu->cap_ptr = h->cap_ptr; | ||
945 | iommu->pci_seg = h->pci_seg; | ||
946 | iommu->mmio_phys = h->mmio_phys; | ||
947 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); | ||
948 | if (!iommu->mmio_base) | ||
949 | return -ENOMEM; | ||
950 | |||
951 | iommu->cmd_buf = alloc_command_buffer(iommu); | ||
952 | if (!iommu->cmd_buf) | ||
953 | return -ENOMEM; | ||
954 | |||
955 | iommu->evt_buf = alloc_event_buffer(iommu); | ||
956 | if (!iommu->evt_buf) | ||
957 | return -ENOMEM; | ||
958 | |||
959 | iommu->int_enabled = false; | ||
960 | |||
961 | init_iommu_from_pci(iommu); | ||
962 | init_iommu_from_acpi(iommu, h); | ||
963 | init_iommu_devices(iommu); | ||
964 | |||
965 | if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) | ||
966 | amd_iommu_np_cache = true; | ||
967 | |||
968 | return pci_enable_device(iommu->dev); | ||
969 | } | ||
970 | |||
971 | /* | ||
972 | * Iterates over all IOMMU entries in the ACPI table, allocates the | ||
973 | * IOMMU structure and initializes it with init_iommu_one() | ||
974 | */ | ||
975 | static int __init init_iommu_all(struct acpi_table_header *table) | ||
976 | { | ||
977 | u8 *p = (u8 *)table, *end = (u8 *)table; | ||
978 | struct ivhd_header *h; | ||
979 | struct amd_iommu *iommu; | ||
980 | int ret; | ||
981 | |||
982 | end += table->length; | ||
983 | p += IVRS_HEADER_LENGTH; | ||
984 | |||
985 | while (p < end) { | ||
986 | h = (struct ivhd_header *)p; | ||
987 | switch (*p) { | ||
988 | case ACPI_IVHD_TYPE: | ||
989 | |||
990 | DUMP_printk("device: %02x:%02x.%01x cap: %04x " | ||
991 | "seg: %d flags: %01x info %04x\n", | ||
992 | PCI_BUS(h->devid), PCI_SLOT(h->devid), | ||
993 | PCI_FUNC(h->devid), h->cap_ptr, | ||
994 | h->pci_seg, h->flags, h->info); | ||
995 | DUMP_printk(" mmio-addr: %016llx\n", | ||
996 | h->mmio_phys); | ||
997 | |||
998 | iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); | ||
999 | if (iommu == NULL) { | ||
1000 | amd_iommu_init_err = -ENOMEM; | ||
1001 | return 0; | ||
1002 | } | ||
1003 | |||
1004 | ret = init_iommu_one(iommu, h); | ||
1005 | if (ret) { | ||
1006 | amd_iommu_init_err = ret; | ||
1007 | return 0; | ||
1008 | } | ||
1009 | break; | ||
1010 | default: | ||
1011 | break; | ||
1012 | } | ||
1013 | p += h->length; | ||
1014 | |||
1015 | } | ||
1016 | WARN_ON(p != end); | ||
1017 | |||
1018 | return 0; | ||
1019 | } | ||
1020 | |||
1021 | /**************************************************************************** | ||
1022 | * | ||
1023 | * The following functions initialize the MSI interrupts for all IOMMUs | ||
1024 | * in the system. Its a bit challenging because there could be multiple | ||
1025 | * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per | ||
1026 | * pci_dev. | ||
1027 | * | ||
1028 | ****************************************************************************/ | ||
1029 | |||
1030 | static int iommu_setup_msi(struct amd_iommu *iommu) | ||
1031 | { | ||
1032 | int r; | ||
1033 | |||
1034 | if (pci_enable_msi(iommu->dev)) | ||
1035 | return 1; | ||
1036 | |||
1037 | r = request_threaded_irq(iommu->dev->irq, | ||
1038 | amd_iommu_int_handler, | ||
1039 | amd_iommu_int_thread, | ||
1040 | 0, "AMD-Vi", | ||
1041 | iommu->dev); | ||
1042 | |||
1043 | if (r) { | ||
1044 | pci_disable_msi(iommu->dev); | ||
1045 | return 1; | ||
1046 | } | ||
1047 | |||
1048 | iommu->int_enabled = true; | ||
1049 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | ||
1050 | |||
1051 | return 0; | ||
1052 | } | ||
1053 | |||
1054 | static int iommu_init_msi(struct amd_iommu *iommu) | ||
1055 | { | ||
1056 | if (iommu->int_enabled) | ||
1057 | return 0; | ||
1058 | |||
1059 | if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) | ||
1060 | return iommu_setup_msi(iommu); | ||
1061 | |||
1062 | return 1; | ||
1063 | } | ||
1064 | |||
1065 | /**************************************************************************** | ||
1066 | * | ||
1067 | * The next functions belong to the third pass of parsing the ACPI | ||
1068 | * table. In this last pass the memory mapping requirements are | ||
1069 | * gathered (like exclusion and unity mapping reanges). | ||
1070 | * | ||
1071 | ****************************************************************************/ | ||
1072 | |||
1073 | static void __init free_unity_maps(void) | ||
1074 | { | ||
1075 | struct unity_map_entry *entry, *next; | ||
1076 | |||
1077 | list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { | ||
1078 | list_del(&entry->list); | ||
1079 | kfree(entry); | ||
1080 | } | ||
1081 | } | ||
1082 | |||
1083 | /* called when we find an exclusion range definition in ACPI */ | ||
1084 | static int __init init_exclusion_range(struct ivmd_header *m) | ||
1085 | { | ||
1086 | int i; | ||
1087 | |||
1088 | switch (m->type) { | ||
1089 | case ACPI_IVMD_TYPE: | ||
1090 | set_device_exclusion_range(m->devid, m); | ||
1091 | break; | ||
1092 | case ACPI_IVMD_TYPE_ALL: | ||
1093 | for (i = 0; i <= amd_iommu_last_bdf; ++i) | ||
1094 | set_device_exclusion_range(i, m); | ||
1095 | break; | ||
1096 | case ACPI_IVMD_TYPE_RANGE: | ||
1097 | for (i = m->devid; i <= m->aux; ++i) | ||
1098 | set_device_exclusion_range(i, m); | ||
1099 | break; | ||
1100 | default: | ||
1101 | break; | ||
1102 | } | ||
1103 | |||
1104 | return 0; | ||
1105 | } | ||
1106 | |||
1107 | /* called for unity map ACPI definition */ | ||
1108 | static int __init init_unity_map_range(struct ivmd_header *m) | ||
1109 | { | ||
1110 | struct unity_map_entry *e = 0; | ||
1111 | char *s; | ||
1112 | |||
1113 | e = kzalloc(sizeof(*e), GFP_KERNEL); | ||
1114 | if (e == NULL) | ||
1115 | return -ENOMEM; | ||
1116 | |||
1117 | switch (m->type) { | ||
1118 | default: | ||
1119 | kfree(e); | ||
1120 | return 0; | ||
1121 | case ACPI_IVMD_TYPE: | ||
1122 | s = "IVMD_TYPEi\t\t\t"; | ||
1123 | e->devid_start = e->devid_end = m->devid; | ||
1124 | break; | ||
1125 | case ACPI_IVMD_TYPE_ALL: | ||
1126 | s = "IVMD_TYPE_ALL\t\t"; | ||
1127 | e->devid_start = 0; | ||
1128 | e->devid_end = amd_iommu_last_bdf; | ||
1129 | break; | ||
1130 | case ACPI_IVMD_TYPE_RANGE: | ||
1131 | s = "IVMD_TYPE_RANGE\t\t"; | ||
1132 | e->devid_start = m->devid; | ||
1133 | e->devid_end = m->aux; | ||
1134 | break; | ||
1135 | } | ||
1136 | e->address_start = PAGE_ALIGN(m->range_start); | ||
1137 | e->address_end = e->address_start + PAGE_ALIGN(m->range_length); | ||
1138 | e->prot = m->flags >> 1; | ||
1139 | |||
1140 | DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" | ||
1141 | " range_start: %016llx range_end: %016llx flags: %x\n", s, | ||
1142 | PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start), | ||
1143 | PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end), | ||
1144 | PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), | ||
1145 | e->address_start, e->address_end, m->flags); | ||
1146 | |||
1147 | list_add_tail(&e->list, &amd_iommu_unity_map); | ||
1148 | |||
1149 | return 0; | ||
1150 | } | ||
1151 | |||
1152 | /* iterates over all memory definitions we find in the ACPI table */ | ||
1153 | static int __init init_memory_definitions(struct acpi_table_header *table) | ||
1154 | { | ||
1155 | u8 *p = (u8 *)table, *end = (u8 *)table; | ||
1156 | struct ivmd_header *m; | ||
1157 | |||
1158 | end += table->length; | ||
1159 | p += IVRS_HEADER_LENGTH; | ||
1160 | |||
1161 | while (p < end) { | ||
1162 | m = (struct ivmd_header *)p; | ||
1163 | if (m->flags & IVMD_FLAG_EXCL_RANGE) | ||
1164 | init_exclusion_range(m); | ||
1165 | else if (m->flags & IVMD_FLAG_UNITY_MAP) | ||
1166 | init_unity_map_range(m); | ||
1167 | |||
1168 | p += m->length; | ||
1169 | } | ||
1170 | |||
1171 | return 0; | ||
1172 | } | ||
1173 | |||
1174 | /* | ||
1175 | * Init the device table to not allow DMA access for devices and | ||
1176 | * suppress all page faults | ||
1177 | */ | ||
1178 | static void init_device_table(void) | ||
1179 | { | ||
1180 | u32 devid; | ||
1181 | |||
1182 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { | ||
1183 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); | ||
1184 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); | ||
1185 | } | ||
1186 | } | ||
1187 | |||
1188 | static void iommu_init_flags(struct amd_iommu *iommu) | ||
1189 | { | ||
1190 | iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? | ||
1191 | iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : | ||
1192 | iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); | ||
1193 | |||
1194 | iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? | ||
1195 | iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : | ||
1196 | iommu_feature_disable(iommu, CONTROL_PASSPW_EN); | ||
1197 | |||
1198 | iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? | ||
1199 | iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : | ||
1200 | iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); | ||
1201 | |||
1202 | iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? | ||
1203 | iommu_feature_enable(iommu, CONTROL_ISOC_EN) : | ||
1204 | iommu_feature_disable(iommu, CONTROL_ISOC_EN); | ||
1205 | |||
1206 | /* | ||
1207 | * make IOMMU memory accesses cache coherent | ||
1208 | */ | ||
1209 | iommu_feature_enable(iommu, CONTROL_COHERENT_EN); | ||
1210 | } | ||
1211 | |||
1212 | static void iommu_apply_resume_quirks(struct amd_iommu *iommu) | ||
1213 | { | ||
1214 | int i, j; | ||
1215 | u32 ioc_feature_control; | ||
1216 | struct pci_dev *pdev = NULL; | ||
1217 | |||
1218 | /* RD890 BIOSes may not have completely reconfigured the iommu */ | ||
1219 | if (!is_rd890_iommu(iommu->dev)) | ||
1220 | return; | ||
1221 | |||
1222 | /* | ||
1223 | * First, we need to ensure that the iommu is enabled. This is | ||
1224 | * controlled by a register in the northbridge | ||
1225 | */ | ||
1226 | pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); | ||
1227 | |||
1228 | if (!pdev) | ||
1229 | return; | ||
1230 | |||
1231 | /* Select Northbridge indirect register 0x75 and enable writing */ | ||
1232 | pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); | ||
1233 | pci_read_config_dword(pdev, 0x64, &ioc_feature_control); | ||
1234 | |||
1235 | /* Enable the iommu */ | ||
1236 | if (!(ioc_feature_control & 0x1)) | ||
1237 | pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); | ||
1238 | |||
1239 | pci_dev_put(pdev); | ||
1240 | |||
1241 | /* Restore the iommu BAR */ | ||
1242 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
1243 | iommu->stored_addr_lo); | ||
1244 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, | ||
1245 | iommu->stored_addr_hi); | ||
1246 | |||
1247 | /* Restore the l1 indirect regs for each of the 6 l1s */ | ||
1248 | for (i = 0; i < 6; i++) | ||
1249 | for (j = 0; j < 0x12; j++) | ||
1250 | iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); | ||
1251 | |||
1252 | /* Restore the l2 indirect regs */ | ||
1253 | for (i = 0; i < 0x83; i++) | ||
1254 | iommu_write_l2(iommu, i, iommu->stored_l2[i]); | ||
1255 | |||
1256 | /* Lock PCI setup registers */ | ||
1257 | pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, | ||
1258 | iommu->stored_addr_lo | 1); | ||
1259 | } | ||
1260 | |||
1261 | /* | ||
1262 | * This function finally enables all IOMMUs found in the system after | ||
1263 | * they have been initialized | ||
1264 | */ | ||
1265 | static void enable_iommus(void) | ||
1266 | { | ||
1267 | struct amd_iommu *iommu; | ||
1268 | |||
1269 | for_each_iommu(iommu) { | ||
1270 | iommu_disable(iommu); | ||
1271 | iommu_init_flags(iommu); | ||
1272 | iommu_set_device_table(iommu); | ||
1273 | iommu_enable_command_buffer(iommu); | ||
1274 | iommu_enable_event_buffer(iommu); | ||
1275 | iommu_set_exclusion_range(iommu); | ||
1276 | iommu_init_msi(iommu); | ||
1277 | iommu_enable(iommu); | ||
1278 | iommu_flush_all_caches(iommu); | ||
1279 | } | ||
1280 | } | ||
1281 | |||
1282 | static void disable_iommus(void) | ||
1283 | { | ||
1284 | struct amd_iommu *iommu; | ||
1285 | |||
1286 | for_each_iommu(iommu) | ||
1287 | iommu_disable(iommu); | ||
1288 | } | ||
1289 | |||
1290 | /* | ||
1291 | * Suspend/Resume support | ||
1292 | * disable suspend until real resume implemented | ||
1293 | */ | ||
1294 | |||
1295 | static void amd_iommu_resume(void) | ||
1296 | { | ||
1297 | struct amd_iommu *iommu; | ||
1298 | |||
1299 | for_each_iommu(iommu) | ||
1300 | iommu_apply_resume_quirks(iommu); | ||
1301 | |||
1302 | /* re-load the hardware */ | ||
1303 | enable_iommus(); | ||
1304 | |||
1305 | /* | ||
1306 | * we have to flush after the IOMMUs are enabled because a | ||
1307 | * disabled IOMMU will never execute the commands we send | ||
1308 | */ | ||
1309 | for_each_iommu(iommu) | ||
1310 | iommu_flush_all_caches(iommu); | ||
1311 | } | ||
1312 | |||
1313 | static int amd_iommu_suspend(void) | ||
1314 | { | ||
1315 | /* disable IOMMUs to go out of the way for BIOS */ | ||
1316 | disable_iommus(); | ||
1317 | |||
1318 | return 0; | ||
1319 | } | ||
1320 | |||
1321 | static struct syscore_ops amd_iommu_syscore_ops = { | ||
1322 | .suspend = amd_iommu_suspend, | ||
1323 | .resume = amd_iommu_resume, | ||
1324 | }; | ||
1325 | |||
1326 | /* | ||
1327 | * This is the core init function for AMD IOMMU hardware in the system. | ||
1328 | * This function is called from the generic x86 DMA layer initialization | ||
1329 | * code. | ||
1330 | * | ||
1331 | * This function basically parses the ACPI table for AMD IOMMU (IVRS) | ||
1332 | * three times: | ||
1333 | * | ||
1334 | * 1 pass) Find the highest PCI device id the driver has to handle. | ||
1335 | * Upon this information the size of the data structures is | ||
1336 | * determined that needs to be allocated. | ||
1337 | * | ||
1338 | * 2 pass) Initialize the data structures just allocated with the | ||
1339 | * information in the ACPI table about available AMD IOMMUs | ||
1340 | * in the system. It also maps the PCI devices in the | ||
1341 | * system to specific IOMMUs | ||
1342 | * | ||
1343 | * 3 pass) After the basic data structures are allocated and | ||
1344 | * initialized we update them with information about memory | ||
1345 | * remapping requirements parsed out of the ACPI table in | ||
1346 | * this last pass. | ||
1347 | * | ||
1348 | * After that the hardware is initialized and ready to go. In the last | ||
1349 | * step we do some Linux specific things like registering the driver in | ||
1350 | * the dma_ops interface and initializing the suspend/resume support | ||
1351 | * functions. Finally it prints some information about AMD IOMMUs and | ||
1352 | * the driver state and enables the hardware. | ||
1353 | */ | ||
1354 | static int __init amd_iommu_init(void) | ||
1355 | { | ||
1356 | int i, ret = 0; | ||
1357 | |||
1358 | /* | ||
1359 | * First parse ACPI tables to find the largest Bus/Dev/Func | ||
1360 | * we need to handle. Upon this information the shared data | ||
1361 | * structures for the IOMMUs in the system will be allocated | ||
1362 | */ | ||
1363 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) | ||
1364 | return -ENODEV; | ||
1365 | |||
1366 | ret = amd_iommu_init_err; | ||
1367 | if (ret) | ||
1368 | goto out; | ||
1369 | |||
1370 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); | ||
1371 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); | ||
1372 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); | ||
1373 | |||
1374 | ret = -ENOMEM; | ||
1375 | |||
1376 | /* Device table - directly used by all IOMMUs */ | ||
1377 | amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
1378 | get_order(dev_table_size)); | ||
1379 | if (amd_iommu_dev_table == NULL) | ||
1380 | goto out; | ||
1381 | |||
1382 | /* | ||
1383 | * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the | ||
1384 | * IOMMU see for that device | ||
1385 | */ | ||
1386 | amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, | ||
1387 | get_order(alias_table_size)); | ||
1388 | if (amd_iommu_alias_table == NULL) | ||
1389 | goto free; | ||
1390 | |||
1391 | /* IOMMU rlookup table - find the IOMMU for a specific device */ | ||
1392 | amd_iommu_rlookup_table = (void *)__get_free_pages( | ||
1393 | GFP_KERNEL | __GFP_ZERO, | ||
1394 | get_order(rlookup_table_size)); | ||
1395 | if (amd_iommu_rlookup_table == NULL) | ||
1396 | goto free; | ||
1397 | |||
1398 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( | ||
1399 | GFP_KERNEL | __GFP_ZERO, | ||
1400 | get_order(MAX_DOMAIN_ID/8)); | ||
1401 | if (amd_iommu_pd_alloc_bitmap == NULL) | ||
1402 | goto free; | ||
1403 | |||
1404 | /* init the device table */ | ||
1405 | init_device_table(); | ||
1406 | |||
1407 | /* | ||
1408 | * let all alias entries point to itself | ||
1409 | */ | ||
1410 | for (i = 0; i <= amd_iommu_last_bdf; ++i) | ||
1411 | amd_iommu_alias_table[i] = i; | ||
1412 | |||
1413 | /* | ||
1414 | * never allocate domain 0 because its used as the non-allocated and | ||
1415 | * error value placeholder | ||
1416 | */ | ||
1417 | amd_iommu_pd_alloc_bitmap[0] = 1; | ||
1418 | |||
1419 | spin_lock_init(&amd_iommu_pd_lock); | ||
1420 | |||
1421 | /* | ||
1422 | * now the data structures are allocated and basically initialized | ||
1423 | * start the real acpi table scan | ||
1424 | */ | ||
1425 | ret = -ENODEV; | ||
1426 | if (acpi_table_parse("IVRS", init_iommu_all) != 0) | ||
1427 | goto free; | ||
1428 | |||
1429 | if (amd_iommu_init_err) { | ||
1430 | ret = amd_iommu_init_err; | ||
1431 | goto free; | ||
1432 | } | ||
1433 | |||
1434 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) | ||
1435 | goto free; | ||
1436 | |||
1437 | if (amd_iommu_init_err) { | ||
1438 | ret = amd_iommu_init_err; | ||
1439 | goto free; | ||
1440 | } | ||
1441 | |||
1442 | ret = amd_iommu_init_devices(); | ||
1443 | if (ret) | ||
1444 | goto free; | ||
1445 | |||
1446 | enable_iommus(); | ||
1447 | |||
1448 | if (iommu_pass_through) | ||
1449 | ret = amd_iommu_init_passthrough(); | ||
1450 | else | ||
1451 | ret = amd_iommu_init_dma_ops(); | ||
1452 | |||
1453 | if (ret) | ||
1454 | goto free_disable; | ||
1455 | |||
1456 | amd_iommu_init_api(); | ||
1457 | |||
1458 | amd_iommu_init_notifier(); | ||
1459 | |||
1460 | register_syscore_ops(&amd_iommu_syscore_ops); | ||
1461 | |||
1462 | if (iommu_pass_through) | ||
1463 | goto out; | ||
1464 | |||
1465 | if (amd_iommu_unmap_flush) | ||
1466 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); | ||
1467 | else | ||
1468 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); | ||
1469 | |||
1470 | x86_platform.iommu_shutdown = disable_iommus; | ||
1471 | out: | ||
1472 | return ret; | ||
1473 | |||
1474 | free_disable: | ||
1475 | disable_iommus(); | ||
1476 | |||
1477 | free: | ||
1478 | amd_iommu_uninit_devices(); | ||
1479 | |||
1480 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, | ||
1481 | get_order(MAX_DOMAIN_ID/8)); | ||
1482 | |||
1483 | free_pages((unsigned long)amd_iommu_rlookup_table, | ||
1484 | get_order(rlookup_table_size)); | ||
1485 | |||
1486 | free_pages((unsigned long)amd_iommu_alias_table, | ||
1487 | get_order(alias_table_size)); | ||
1488 | |||
1489 | free_pages((unsigned long)amd_iommu_dev_table, | ||
1490 | get_order(dev_table_size)); | ||
1491 | |||
1492 | free_iommu_all(); | ||
1493 | |||
1494 | free_unity_maps(); | ||
1495 | |||
1496 | #ifdef CONFIG_GART_IOMMU | ||
1497 | /* | ||
1498 | * We failed to initialize the AMD IOMMU - try fallback to GART | ||
1499 | * if possible. | ||
1500 | */ | ||
1501 | gart_iommu_init(); | ||
1502 | |||
1503 | #endif | ||
1504 | |||
1505 | goto out; | ||
1506 | } | ||
1507 | |||
1508 | /**************************************************************************** | ||
1509 | * | ||
1510 | * Early detect code. This code runs at IOMMU detection time in the DMA | ||
1511 | * layer. It just looks if there is an IVRS ACPI table to detect AMD | ||
1512 | * IOMMUs | ||
1513 | * | ||
1514 | ****************************************************************************/ | ||
1515 | static int __init early_amd_iommu_detect(struct acpi_table_header *table) | ||
1516 | { | ||
1517 | return 0; | ||
1518 | } | ||
1519 | |||
1520 | int __init amd_iommu_detect(void) | ||
1521 | { | ||
1522 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) | ||
1523 | return -ENODEV; | ||
1524 | |||
1525 | if (amd_iommu_disabled) | ||
1526 | return -ENODEV; | ||
1527 | |||
1528 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | ||
1529 | iommu_detected = 1; | ||
1530 | amd_iommu_detected = 1; | ||
1531 | x86_init.iommu.iommu_init = amd_iommu_init; | ||
1532 | |||
1533 | /* Make sure ACS will be enabled */ | ||
1534 | pci_request_acs(); | ||
1535 | return 1; | ||
1536 | } | ||
1537 | return -ENODEV; | ||
1538 | } | ||
1539 | |||
1540 | /**************************************************************************** | ||
1541 | * | ||
1542 | * Parsing functions for the AMD IOMMU specific kernel command line | ||
1543 | * options. | ||
1544 | * | ||
1545 | ****************************************************************************/ | ||
1546 | |||
1547 | static int __init parse_amd_iommu_dump(char *str) | ||
1548 | { | ||
1549 | amd_iommu_dump = true; | ||
1550 | |||
1551 | return 1; | ||
1552 | } | ||
1553 | |||
1554 | static int __init parse_amd_iommu_options(char *str) | ||
1555 | { | ||
1556 | for (; *str; ++str) { | ||
1557 | if (strncmp(str, "fullflush", 9) == 0) | ||
1558 | amd_iommu_unmap_flush = true; | ||
1559 | if (strncmp(str, "off", 3) == 0) | ||
1560 | amd_iommu_disabled = true; | ||
1561 | } | ||
1562 | |||
1563 | return 1; | ||
1564 | } | ||
1565 | |||
1566 | __setup("amd_iommu_dump", parse_amd_iommu_dump); | ||
1567 | __setup("amd_iommu=", parse_amd_iommu_options); | ||
1568 | |||
1569 | IOMMU_INIT_FINISH(amd_iommu_detect, | ||
1570 | gart_iommu_hole_init, | ||
1571 | 0, | ||
1572 | 0); | ||
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 289e92862fd9..afdc3f756dea 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -27,15 +27,12 @@ | |||
27 | * timer, but by default APB timer has higher rating than local APIC timers. | 27 | * timer, but by default APB timer has higher rating than local APIC timers. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <linux/clocksource.h> | ||
31 | #include <linux/clockchips.h> | ||
32 | #include <linux/delay.h> | 30 | #include <linux/delay.h> |
31 | #include <linux/dw_apb_timer.h> | ||
33 | #include <linux/errno.h> | 32 | #include <linux/errno.h> |
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
35 | #include <linux/sysdev.h> | ||
36 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
37 | #include <linux/pm.h> | 35 | #include <linux/pm.h> |
38 | #include <linux/pci.h> | ||
39 | #include <linux/sfi.h> | 36 | #include <linux/sfi.h> |
40 | #include <linux/interrupt.h> | 37 | #include <linux/interrupt.h> |
41 | #include <linux/cpu.h> | 38 | #include <linux/cpu.h> |
@@ -44,76 +41,48 @@ | |||
44 | #include <asm/fixmap.h> | 41 | #include <asm/fixmap.h> |
45 | #include <asm/apb_timer.h> | 42 | #include <asm/apb_timer.h> |
46 | #include <asm/mrst.h> | 43 | #include <asm/mrst.h> |
44 | #include <asm/time.h> | ||
47 | 45 | ||
48 | #define APBT_MASK CLOCKSOURCE_MASK(32) | ||
49 | #define APBT_SHIFT 22 | ||
50 | #define APBT_CLOCKEVENT_RATING 110 | 46 | #define APBT_CLOCKEVENT_RATING 110 |
51 | #define APBT_CLOCKSOURCE_RATING 250 | 47 | #define APBT_CLOCKSOURCE_RATING 250 |
52 | #define APBT_MIN_DELTA_USEC 200 | ||
53 | 48 | ||
54 | #define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt) | ||
55 | #define APBT_CLOCKEVENT0_NUM (0) | 49 | #define APBT_CLOCKEVENT0_NUM (0) |
56 | #define APBT_CLOCKEVENT1_NUM (1) | ||
57 | #define APBT_CLOCKSOURCE_NUM (2) | 50 | #define APBT_CLOCKSOURCE_NUM (2) |
58 | 51 | ||
59 | static unsigned long apbt_address; | 52 | static phys_addr_t apbt_address; |
60 | static int apb_timer_block_enabled; | 53 | static int apb_timer_block_enabled; |
61 | static void __iomem *apbt_virt_address; | 54 | static void __iomem *apbt_virt_address; |
62 | static int phy_cs_timer_id; | ||
63 | 55 | ||
64 | /* | 56 | /* |
65 | * Common DW APB timer info | 57 | * Common DW APB timer info |
66 | */ | 58 | */ |
67 | static uint64_t apbt_freq; | 59 | static unsigned long apbt_freq; |
68 | |||
69 | static void apbt_set_mode(enum clock_event_mode mode, | ||
70 | struct clock_event_device *evt); | ||
71 | static int apbt_next_event(unsigned long delta, | ||
72 | struct clock_event_device *evt); | ||
73 | static cycle_t apbt_read_clocksource(struct clocksource *cs); | ||
74 | static void apbt_restart_clocksource(struct clocksource *cs); | ||
75 | 60 | ||
76 | struct apbt_dev { | 61 | struct apbt_dev { |
77 | struct clock_event_device evt; | 62 | struct dw_apb_clock_event_device *timer; |
78 | unsigned int num; | 63 | unsigned int num; |
79 | int cpu; | 64 | int cpu; |
80 | unsigned int irq; | 65 | unsigned int irq; |
81 | unsigned int tick; | 66 | char name[10]; |
82 | unsigned int count; | ||
83 | unsigned int flags; | ||
84 | char name[10]; | ||
85 | }; | 67 | }; |
86 | 68 | ||
87 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); | 69 | static struct dw_apb_clocksource *clocksource_apbt; |
88 | 70 | ||
89 | #ifdef CONFIG_SMP | 71 | static inline void __iomem *adev_virt_addr(struct apbt_dev *adev) |
90 | static unsigned int apbt_num_timers_used; | ||
91 | static struct apbt_dev *apbt_devs; | ||
92 | #endif | ||
93 | |||
94 | static inline unsigned long apbt_readl_reg(unsigned long a) | ||
95 | { | 72 | { |
96 | return readl(apbt_virt_address + a); | 73 | return apbt_virt_address + adev->num * APBTMRS_REG_SIZE; |
97 | } | 74 | } |
98 | 75 | ||
99 | static inline void apbt_writel_reg(unsigned long d, unsigned long a) | 76 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); |
100 | { | ||
101 | writel(d, apbt_virt_address + a); | ||
102 | } | ||
103 | |||
104 | static inline unsigned long apbt_readl(int n, unsigned long a) | ||
105 | { | ||
106 | return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE); | ||
107 | } | ||
108 | 77 | ||
109 | static inline void apbt_writel(int n, unsigned long d, unsigned long a) | 78 | #ifdef CONFIG_SMP |
110 | { | 79 | static unsigned int apbt_num_timers_used; |
111 | writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE); | 80 | #endif |
112 | } | ||
113 | 81 | ||
114 | static inline void apbt_set_mapping(void) | 82 | static inline void apbt_set_mapping(void) |
115 | { | 83 | { |
116 | struct sfi_timer_table_entry *mtmr; | 84 | struct sfi_timer_table_entry *mtmr; |
85 | int phy_cs_timer_id = 0; | ||
117 | 86 | ||
118 | if (apbt_virt_address) { | 87 | if (apbt_virt_address) { |
119 | pr_debug("APBT base already mapped\n"); | 88 | pr_debug("APBT base already mapped\n"); |
@@ -125,21 +94,18 @@ static inline void apbt_set_mapping(void) | |||
125 | APBT_CLOCKEVENT0_NUM); | 94 | APBT_CLOCKEVENT0_NUM); |
126 | return; | 95 | return; |
127 | } | 96 | } |
128 | apbt_address = (unsigned long)mtmr->phys_addr; | 97 | apbt_address = (phys_addr_t)mtmr->phys_addr; |
129 | if (!apbt_address) { | 98 | if (!apbt_address) { |
130 | printk(KERN_WARNING "No timer base from SFI, use default\n"); | 99 | printk(KERN_WARNING "No timer base from SFI, use default\n"); |
131 | apbt_address = APBT_DEFAULT_BASE; | 100 | apbt_address = APBT_DEFAULT_BASE; |
132 | } | 101 | } |
133 | apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); | 102 | apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); |
134 | if (apbt_virt_address) { | 103 | if (!apbt_virt_address) { |
135 | pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\ | 104 | pr_debug("Failed mapping APBT phy address at %lu\n",\ |
136 | (void *)apbt_address, (void *)apbt_virt_address); | 105 | (unsigned long)apbt_address); |
137 | } else { | ||
138 | pr_debug("Failed mapping APBT phy address at %p\n",\ | ||
139 | (void *)apbt_address); | ||
140 | goto panic_noapbt; | 106 | goto panic_noapbt; |
141 | } | 107 | } |
142 | apbt_freq = mtmr->freq_hz / USEC_PER_SEC; | 108 | apbt_freq = mtmr->freq_hz; |
143 | sfi_free_mtmr(mtmr); | 109 | sfi_free_mtmr(mtmr); |
144 | 110 | ||
145 | /* Now figure out the physical timer id for clocksource device */ | 111 | /* Now figure out the physical timer id for clocksource device */ |
@@ -148,9 +114,14 @@ static inline void apbt_set_mapping(void) | |||
148 | goto panic_noapbt; | 114 | goto panic_noapbt; |
149 | 115 | ||
150 | /* Now figure out the physical timer id */ | 116 | /* Now figure out the physical timer id */ |
151 | phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) | 117 | pr_debug("Use timer %d for clocksource\n", |
152 | / APBTMRS_REG_SIZE; | 118 | (int)(mtmr->phys_addr & 0xff) / APBTMRS_REG_SIZE); |
153 | pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id); | 119 | phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) / |
120 | APBTMRS_REG_SIZE; | ||
121 | |||
122 | clocksource_apbt = dw_apb_clocksource_init(APBT_CLOCKSOURCE_RATING, | ||
123 | "apbt0", apbt_virt_address + phy_cs_timer_id * | ||
124 | APBTMRS_REG_SIZE, apbt_freq); | ||
154 | return; | 125 | return; |
155 | 126 | ||
156 | panic_noapbt: | 127 | panic_noapbt: |
@@ -172,82 +143,6 @@ static inline int is_apbt_capable(void) | |||
172 | return apbt_virt_address ? 1 : 0; | 143 | return apbt_virt_address ? 1 : 0; |
173 | } | 144 | } |
174 | 145 | ||
175 | static struct clocksource clocksource_apbt = { | ||
176 | .name = "apbt", | ||
177 | .rating = APBT_CLOCKSOURCE_RATING, | ||
178 | .read = apbt_read_clocksource, | ||
179 | .mask = APBT_MASK, | ||
180 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
181 | .resume = apbt_restart_clocksource, | ||
182 | }; | ||
183 | |||
184 | /* boot APB clock event device */ | ||
185 | static struct clock_event_device apbt_clockevent = { | ||
186 | .name = "apbt0", | ||
187 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
188 | .set_mode = apbt_set_mode, | ||
189 | .set_next_event = apbt_next_event, | ||
190 | .shift = APBT_SHIFT, | ||
191 | .irq = 0, | ||
192 | .rating = APBT_CLOCKEVENT_RATING, | ||
193 | }; | ||
194 | |||
195 | /* | ||
196 | * start count down from 0xffff_ffff. this is done by toggling the enable bit | ||
197 | * then load initial load count to ~0. | ||
198 | */ | ||
199 | static void apbt_start_counter(int n) | ||
200 | { | ||
201 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
202 | |||
203 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
204 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
205 | apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT); | ||
206 | /* enable, mask interrupt */ | ||
207 | ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; | ||
208 | ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT); | ||
209 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
210 | /* read it once to get cached counter value initialized */ | ||
211 | apbt_read_clocksource(&clocksource_apbt); | ||
212 | } | ||
213 | |||
214 | static irqreturn_t apbt_interrupt_handler(int irq, void *data) | ||
215 | { | ||
216 | struct apbt_dev *dev = (struct apbt_dev *)data; | ||
217 | struct clock_event_device *aevt = &dev->evt; | ||
218 | |||
219 | if (!aevt->event_handler) { | ||
220 | printk(KERN_INFO "Spurious APBT timer interrupt on %d\n", | ||
221 | dev->num); | ||
222 | return IRQ_NONE; | ||
223 | } | ||
224 | aevt->event_handler(aevt); | ||
225 | return IRQ_HANDLED; | ||
226 | } | ||
227 | |||
228 | static void apbt_restart_clocksource(struct clocksource *cs) | ||
229 | { | ||
230 | apbt_start_counter(phy_cs_timer_id); | ||
231 | } | ||
232 | |||
233 | static void apbt_enable_int(int n) | ||
234 | { | ||
235 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
236 | /* clear pending intr */ | ||
237 | apbt_readl(n, APBTMR_N_EOI); | ||
238 | ctrl &= ~APBTMR_CONTROL_INT; | ||
239 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
240 | } | ||
241 | |||
242 | static void apbt_disable_int(int n) | ||
243 | { | ||
244 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
245 | |||
246 | ctrl |= APBTMR_CONTROL_INT; | ||
247 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
248 | } | ||
249 | |||
250 | |||
251 | static int __init apbt_clockevent_register(void) | 146 | static int __init apbt_clockevent_register(void) |
252 | { | 147 | { |
253 | struct sfi_timer_table_entry *mtmr; | 148 | struct sfi_timer_table_entry *mtmr; |
@@ -260,45 +155,21 @@ static int __init apbt_clockevent_register(void) | |||
260 | return -ENODEV; | 155 | return -ENODEV; |
261 | } | 156 | } |
262 | 157 | ||
263 | /* | ||
264 | * We need to calculate the scaled math multiplication factor for | ||
265 | * nanosecond to apbt tick conversion. | ||
266 | * mult = (nsec/cycle)*2^APBT_SHIFT | ||
267 | */ | ||
268 | apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz | ||
269 | , NSEC_PER_SEC, APBT_SHIFT); | ||
270 | |||
271 | /* Calculate the min / max delta */ | ||
272 | apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | ||
273 | &apbt_clockevent); | ||
274 | apbt_clockevent.min_delta_ns = clockevent_delta2ns( | ||
275 | APBT_MIN_DELTA_USEC*apbt_freq, | ||
276 | &apbt_clockevent); | ||
277 | /* | ||
278 | * Start apbt with the boot cpu mask and make it | ||
279 | * global if not used for per cpu timer. | ||
280 | */ | ||
281 | apbt_clockevent.cpumask = cpumask_of(smp_processor_id()); | ||
282 | adev->num = smp_processor_id(); | 158 | adev->num = smp_processor_id(); |
283 | memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); | 159 | adev->timer = dw_apb_clockevent_init(smp_processor_id(), "apbt0", |
160 | mrst_timer_options == MRST_TIMER_LAPIC_APBT ? | ||
161 | APBT_CLOCKEVENT_RATING - 100 : APBT_CLOCKEVENT_RATING, | ||
162 | adev_virt_addr(adev), 0, apbt_freq); | ||
163 | /* Firmware does EOI handling for us. */ | ||
164 | adev->timer->eoi = NULL; | ||
284 | 165 | ||
285 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { | 166 | if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { |
286 | adev->evt.rating = APBT_CLOCKEVENT_RATING - 100; | 167 | global_clock_event = &adev->timer->ced; |
287 | global_clock_event = &adev->evt; | ||
288 | printk(KERN_DEBUG "%s clockevent registered as global\n", | 168 | printk(KERN_DEBUG "%s clockevent registered as global\n", |
289 | global_clock_event->name); | 169 | global_clock_event->name); |
290 | } | 170 | } |
291 | 171 | ||
292 | if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler, | 172 | dw_apb_clockevent_register(adev->timer); |
293 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
294 | apbt_clockevent.name, adev)) { | ||
295 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
296 | apbt_clockevent.irq); | ||
297 | } | ||
298 | |||
299 | clockevents_register_device(&adev->evt); | ||
300 | /* Start APBT 0 interrupts */ | ||
301 | apbt_enable_int(APBT_CLOCKEVENT0_NUM); | ||
302 | 173 | ||
303 | sfi_free_mtmr(mtmr); | 174 | sfi_free_mtmr(mtmr); |
304 | return 0; | 175 | return 0; |
@@ -316,52 +187,34 @@ static void apbt_setup_irq(struct apbt_dev *adev) | |||
316 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | 187 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); |
317 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ | 188 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ |
318 | __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge"); | 189 | __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge"); |
319 | |||
320 | if (system_state == SYSTEM_BOOTING) { | ||
321 | if (request_irq(adev->irq, apbt_interrupt_handler, | ||
322 | IRQF_TIMER | IRQF_DISABLED | | ||
323 | IRQF_NOBALANCING, | ||
324 | adev->name, adev)) { | ||
325 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
326 | adev->num); | ||
327 | } | ||
328 | } else | ||
329 | enable_irq(adev->irq); | ||
330 | } | 190 | } |
331 | 191 | ||
332 | /* Should be called with per cpu */ | 192 | /* Should be called with per cpu */ |
333 | void apbt_setup_secondary_clock(void) | 193 | void apbt_setup_secondary_clock(void) |
334 | { | 194 | { |
335 | struct apbt_dev *adev; | 195 | struct apbt_dev *adev; |
336 | struct clock_event_device *aevt; | ||
337 | int cpu; | 196 | int cpu; |
338 | 197 | ||
339 | /* Don't register boot CPU clockevent */ | 198 | /* Don't register boot CPU clockevent */ |
340 | cpu = smp_processor_id(); | 199 | cpu = smp_processor_id(); |
341 | if (!cpu) | 200 | if (!cpu) |
342 | return; | 201 | return; |
343 | /* | ||
344 | * We need to calculate the scaled math multiplication factor for | ||
345 | * nanosecond to apbt tick conversion. | ||
346 | * mult = (nsec/cycle)*2^APBT_SHIFT | ||
347 | */ | ||
348 | printk(KERN_INFO "Init per CPU clockevent %d\n", cpu); | ||
349 | adev = &per_cpu(cpu_apbt_dev, cpu); | ||
350 | aevt = &adev->evt; | ||
351 | 202 | ||
352 | memcpy(aevt, &apbt_clockevent, sizeof(*aevt)); | 203 | adev = &__get_cpu_var(cpu_apbt_dev); |
353 | aevt->cpumask = cpumask_of(cpu); | 204 | if (!adev->timer) { |
354 | aevt->name = adev->name; | 205 | adev->timer = dw_apb_clockevent_init(cpu, adev->name, |
355 | aevt->mode = CLOCK_EVT_MODE_UNUSED; | 206 | APBT_CLOCKEVENT_RATING, adev_virt_addr(adev), |
207 | adev->irq, apbt_freq); | ||
208 | adev->timer->eoi = NULL; | ||
209 | } else { | ||
210 | dw_apb_clockevent_resume(adev->timer); | ||
211 | } | ||
356 | 212 | ||
357 | printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n", | 213 | printk(KERN_INFO "Registering CPU %d clockevent device %s, cpu %08x\n", |
358 | cpu, aevt->name, *(u32 *)aevt->cpumask); | 214 | cpu, adev->name, adev->cpu); |
359 | 215 | ||
360 | apbt_setup_irq(adev); | 216 | apbt_setup_irq(adev); |
361 | 217 | dw_apb_clockevent_register(adev->timer); | |
362 | clockevents_register_device(aevt); | ||
363 | |||
364 | apbt_enable_int(cpu); | ||
365 | 218 | ||
366 | return; | 219 | return; |
367 | } | 220 | } |
@@ -384,13 +237,12 @@ static int apbt_cpuhp_notify(struct notifier_block *n, | |||
384 | 237 | ||
385 | switch (action & 0xf) { | 238 | switch (action & 0xf) { |
386 | case CPU_DEAD: | 239 | case CPU_DEAD: |
387 | disable_irq(adev->irq); | 240 | dw_apb_clockevent_pause(adev->timer); |
388 | apbt_disable_int(cpu); | ||
389 | if (system_state == SYSTEM_RUNNING) { | 241 | if (system_state == SYSTEM_RUNNING) { |
390 | pr_debug("skipping APBT CPU %lu offline\n", cpu); | 242 | pr_debug("skipping APBT CPU %lu offline\n", cpu); |
391 | } else if (adev) { | 243 | } else if (adev) { |
392 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); | 244 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); |
393 | free_irq(adev->irq, adev); | 245 | dw_apb_clockevent_stop(adev->timer); |
394 | } | 246 | } |
395 | break; | 247 | break; |
396 | default: | 248 | default: |
@@ -415,116 +267,16 @@ void apbt_setup_secondary_clock(void) {} | |||
415 | 267 | ||
416 | #endif /* CONFIG_SMP */ | 268 | #endif /* CONFIG_SMP */ |
417 | 269 | ||
418 | static void apbt_set_mode(enum clock_event_mode mode, | ||
419 | struct clock_event_device *evt) | ||
420 | { | ||
421 | unsigned long ctrl; | ||
422 | uint64_t delta; | ||
423 | int timer_num; | ||
424 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | ||
425 | |||
426 | BUG_ON(!apbt_virt_address); | ||
427 | |||
428 | timer_num = adev->num; | ||
429 | pr_debug("%s CPU %d timer %d mode=%d\n", | ||
430 | __func__, first_cpu(*evt->cpumask), timer_num, mode); | ||
431 | |||
432 | switch (mode) { | ||
433 | case CLOCK_EVT_MODE_PERIODIC: | ||
434 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult; | ||
435 | delta >>= apbt_clockevent.shift; | ||
436 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
437 | ctrl |= APBTMR_CONTROL_MODE_PERIODIC; | ||
438 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
439 | /* | ||
440 | * DW APB p. 46, have to disable timer before load counter, | ||
441 | * may cause sync problem. | ||
442 | */ | ||
443 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
444 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
445 | udelay(1); | ||
446 | pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ); | ||
447 | apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); | ||
448 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
449 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
450 | break; | ||
451 | /* APB timer does not have one-shot mode, use free running mode */ | ||
452 | case CLOCK_EVT_MODE_ONESHOT: | ||
453 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
454 | /* | ||
455 | * set free running mode, this mode will let timer reload max | ||
456 | * timeout which will give time (3min on 25MHz clock) to rearm | ||
457 | * the next event, therefore emulate the one-shot mode. | ||
458 | */ | ||
459 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
460 | ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; | ||
461 | |||
462 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
463 | /* write again to set free running mode */ | ||
464 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
465 | |||
466 | /* | ||
467 | * DW APB p. 46, load counter with all 1s before starting free | ||
468 | * running mode. | ||
469 | */ | ||
470 | apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT); | ||
471 | ctrl &= ~APBTMR_CONTROL_INT; | ||
472 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
473 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
474 | break; | ||
475 | |||
476 | case CLOCK_EVT_MODE_UNUSED: | ||
477 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
478 | apbt_disable_int(timer_num); | ||
479 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
480 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
481 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
482 | break; | ||
483 | |||
484 | case CLOCK_EVT_MODE_RESUME: | ||
485 | apbt_enable_int(timer_num); | ||
486 | break; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | static int apbt_next_event(unsigned long delta, | ||
491 | struct clock_event_device *evt) | ||
492 | { | ||
493 | unsigned long ctrl; | ||
494 | int timer_num; | ||
495 | |||
496 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | ||
497 | |||
498 | timer_num = adev->num; | ||
499 | /* Disable timer */ | ||
500 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
501 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
502 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
503 | /* write new count */ | ||
504 | apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); | ||
505 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
506 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | static cycle_t apbt_read_clocksource(struct clocksource *cs) | ||
511 | { | ||
512 | unsigned long current_count; | ||
513 | |||
514 | current_count = apbt_readl(phy_cs_timer_id, APBTMR_N_CURRENT_VALUE); | ||
515 | return (cycle_t)~current_count; | ||
516 | } | ||
517 | |||
518 | static int apbt_clocksource_register(void) | 270 | static int apbt_clocksource_register(void) |
519 | { | 271 | { |
520 | u64 start, now; | 272 | u64 start, now; |
521 | cycle_t t1; | 273 | cycle_t t1; |
522 | 274 | ||
523 | /* Start the counter, use timer 2 as source, timer 0/1 for event */ | 275 | /* Start the counter, use timer 2 as source, timer 0/1 for event */ |
524 | apbt_start_counter(phy_cs_timer_id); | 276 | dw_apb_clocksource_start(clocksource_apbt); |
525 | 277 | ||
526 | /* Verify whether apbt counter works */ | 278 | /* Verify whether apbt counter works */ |
527 | t1 = apbt_read_clocksource(&clocksource_apbt); | 279 | t1 = dw_apb_clocksource_read(clocksource_apbt); |
528 | rdtscll(start); | 280 | rdtscll(start); |
529 | 281 | ||
530 | /* | 282 | /* |
@@ -539,10 +291,10 @@ static int apbt_clocksource_register(void) | |||
539 | } while ((now - start) < 200000UL); | 291 | } while ((now - start) < 200000UL); |
540 | 292 | ||
541 | /* APBT is the only always on clocksource, it has to work! */ | 293 | /* APBT is the only always on clocksource, it has to work! */ |
542 | if (t1 == apbt_read_clocksource(&clocksource_apbt)) | 294 | if (t1 == dw_apb_clocksource_read(clocksource_apbt)) |
543 | panic("APBT counter not counting. APBT disabled\n"); | 295 | panic("APBT counter not counting. APBT disabled\n"); |
544 | 296 | ||
545 | clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000); | 297 | dw_apb_clocksource_register(clocksource_apbt); |
546 | 298 | ||
547 | return 0; | 299 | return 0; |
548 | } | 300 | } |
@@ -566,10 +318,7 @@ void __init apbt_time_init(void) | |||
566 | if (apb_timer_block_enabled) | 318 | if (apb_timer_block_enabled) |
567 | return; | 319 | return; |
568 | apbt_set_mapping(); | 320 | apbt_set_mapping(); |
569 | if (apbt_virt_address) { | 321 | if (!apbt_virt_address) |
570 | pr_debug("Found APBT version 0x%lx\n",\ | ||
571 | apbt_readl_reg(APBTMRS_COMP_VERSION)); | ||
572 | } else | ||
573 | goto out_noapbt; | 322 | goto out_noapbt; |
574 | /* | 323 | /* |
575 | * Read the frequency and check for a sane value, for ESL model | 324 | * Read the frequency and check for a sane value, for ESL model |
@@ -577,7 +326,7 @@ void __init apbt_time_init(void) | |||
577 | */ | 326 | */ |
578 | 327 | ||
579 | if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { | 328 | if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { |
580 | pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq); | 329 | pr_debug("APBT has invalid freq 0x%lx\n", apbt_freq); |
581 | goto out_noapbt; | 330 | goto out_noapbt; |
582 | } | 331 | } |
583 | if (apbt_clocksource_register()) { | 332 | if (apbt_clocksource_register()) { |
@@ -603,30 +352,20 @@ void __init apbt_time_init(void) | |||
603 | } else { | 352 | } else { |
604 | percpu_timer = 0; | 353 | percpu_timer = 0; |
605 | apbt_num_timers_used = 1; | 354 | apbt_num_timers_used = 1; |
606 | adev = &per_cpu(cpu_apbt_dev, 0); | ||
607 | adev->flags &= ~APBT_DEV_USED; | ||
608 | } | 355 | } |
609 | pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); | 356 | pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); |
610 | 357 | ||
611 | /* here we set up per CPU timer data structure */ | 358 | /* here we set up per CPU timer data structure */ |
612 | apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used, | ||
613 | GFP_KERNEL); | ||
614 | if (!apbt_devs) { | ||
615 | printk(KERN_ERR "Failed to allocate APB timer devices\n"); | ||
616 | return; | ||
617 | } | ||
618 | for (i = 0; i < apbt_num_timers_used; i++) { | 359 | for (i = 0; i < apbt_num_timers_used; i++) { |
619 | adev = &per_cpu(cpu_apbt_dev, i); | 360 | adev = &per_cpu(cpu_apbt_dev, i); |
620 | adev->num = i; | 361 | adev->num = i; |
621 | adev->cpu = i; | 362 | adev->cpu = i; |
622 | p_mtmr = sfi_get_mtmr(i); | 363 | p_mtmr = sfi_get_mtmr(i); |
623 | if (p_mtmr) { | 364 | if (p_mtmr) |
624 | adev->tick = p_mtmr->freq_hz; | ||
625 | adev->irq = p_mtmr->irq; | 365 | adev->irq = p_mtmr->irq; |
626 | } else | 366 | else |
627 | printk(KERN_ERR "Failed to get timer for cpu %d\n", i); | 367 | printk(KERN_ERR "Failed to get timer for cpu %d\n", i); |
628 | adev->count = 0; | 368 | snprintf(adev->name, sizeof(adev->name) - 1, "apbt%d", i); |
629 | sprintf(adev->name, "apbt%d", i); | ||
630 | } | 369 | } |
631 | #endif | 370 | #endif |
632 | 371 | ||
@@ -638,17 +377,8 @@ out_noapbt: | |||
638 | panic("failed to enable APB timer\n"); | 377 | panic("failed to enable APB timer\n"); |
639 | } | 378 | } |
640 | 379 | ||
641 | static inline void apbt_disable(int n) | ||
642 | { | ||
643 | if (is_apbt_capable()) { | ||
644 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
645 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
646 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
647 | } | ||
648 | } | ||
649 | |||
650 | /* called before apb_timer_enable, use early map */ | 380 | /* called before apb_timer_enable, use early map */ |
651 | unsigned long apbt_quick_calibrate() | 381 | unsigned long apbt_quick_calibrate(void) |
652 | { | 382 | { |
653 | int i, scale; | 383 | int i, scale; |
654 | u64 old, new; | 384 | u64 old, new; |
@@ -657,31 +387,31 @@ unsigned long apbt_quick_calibrate() | |||
657 | u32 loop, shift; | 387 | u32 loop, shift; |
658 | 388 | ||
659 | apbt_set_mapping(); | 389 | apbt_set_mapping(); |
660 | apbt_start_counter(phy_cs_timer_id); | 390 | dw_apb_clocksource_start(clocksource_apbt); |
661 | 391 | ||
662 | /* check if the timer can count down, otherwise return */ | 392 | /* check if the timer can count down, otherwise return */ |
663 | old = apbt_read_clocksource(&clocksource_apbt); | 393 | old = dw_apb_clocksource_read(clocksource_apbt); |
664 | i = 10000; | 394 | i = 10000; |
665 | while (--i) { | 395 | while (--i) { |
666 | if (old != apbt_read_clocksource(&clocksource_apbt)) | 396 | if (old != dw_apb_clocksource_read(clocksource_apbt)) |
667 | break; | 397 | break; |
668 | } | 398 | } |
669 | if (!i) | 399 | if (!i) |
670 | goto failed; | 400 | goto failed; |
671 | 401 | ||
672 | /* count 16 ms */ | 402 | /* count 16 ms */ |
673 | loop = (apbt_freq * 1000) << 4; | 403 | loop = (apbt_freq / 1000) << 4; |
674 | 404 | ||
675 | /* restart the timer to ensure it won't get to 0 in the calibration */ | 405 | /* restart the timer to ensure it won't get to 0 in the calibration */ |
676 | apbt_start_counter(phy_cs_timer_id); | 406 | dw_apb_clocksource_start(clocksource_apbt); |
677 | 407 | ||
678 | old = apbt_read_clocksource(&clocksource_apbt); | 408 | old = dw_apb_clocksource_read(clocksource_apbt); |
679 | old += loop; | 409 | old += loop; |
680 | 410 | ||
681 | t1 = __native_read_tsc(); | 411 | t1 = __native_read_tsc(); |
682 | 412 | ||
683 | do { | 413 | do { |
684 | new = apbt_read_clocksource(&clocksource_apbt); | 414 | new = dw_apb_clocksource_read(clocksource_apbt); |
685 | } while (new < old); | 415 | } while (new < old); |
686 | 416 | ||
687 | t2 = __native_read_tsc(); | 417 | t2 = __native_read_tsc(); |
@@ -693,7 +423,7 @@ unsigned long apbt_quick_calibrate() | |||
693 | return 0; | 423 | return 0; |
694 | } | 424 | } |
695 | scale = (int)div_u64((t2 - t1), loop >> shift); | 425 | scale = (int)div_u64((t2 - t1), loop >> shift); |
696 | khz = (scale * apbt_freq * 1000) >> shift; | 426 | khz = (scale * (apbt_freq / 1000)) >> shift; |
697 | printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); | 427 | printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); |
698 | return khz; | 428 | return khz; |
699 | failed: | 429 | failed: |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b9338b8cf420..b24be38c8cf8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/syscore_ops.h> | 27 | #include <linux/syscore_ops.h> |
28 | #include <linux/delay.h> | 28 | #include <linux/delay.h> |
29 | #include <linux/timex.h> | 29 | #include <linux/timex.h> |
30 | #include <linux/i8253.h> | ||
30 | #include <linux/dmar.h> | 31 | #include <linux/dmar.h> |
31 | #include <linux/init.h> | 32 | #include <linux/init.h> |
32 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
@@ -39,7 +40,6 @@ | |||
39 | #include <asm/pgalloc.h> | 40 | #include <asm/pgalloc.h> |
40 | #include <asm/atomic.h> | 41 | #include <asm/atomic.h> |
41 | #include <asm/mpspec.h> | 42 | #include <asm/mpspec.h> |
42 | #include <asm/i8253.h> | ||
43 | #include <asm/i8259.h> | 43 | #include <asm/i8259.h> |
44 | #include <asm/proto.h> | 44 | #include <asm/proto.h> |
45 | #include <asm/apic.h> | 45 | #include <asm/apic.h> |
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/hpet.h> | 48 | #include <asm/hpet.h> |
49 | #include <asm/idle.h> | 49 | #include <asm/idle.h> |
50 | #include <asm/mtrr.h> | 50 | #include <asm/mtrr.h> |
51 | #include <asm/time.h> | ||
51 | #include <asm/smp.h> | 52 | #include <asm/smp.h> |
52 | #include <asm/mce.h> | 53 | #include <asm/mce.h> |
53 | #include <asm/tsc.h> | 54 | #include <asm/tsc.h> |
@@ -1429,7 +1430,7 @@ void enable_x2apic(void) | |||
1429 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1430 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1430 | if (!(msr & X2APIC_ENABLE)) { | 1431 | if (!(msr & X2APIC_ENABLE)) { |
1431 | printk_once(KERN_INFO "Enabling x2apic\n"); | 1432 | printk_once(KERN_INFO "Enabling x2apic\n"); |
1432 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1433 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2); |
1433 | } | 1434 | } |
1434 | } | 1435 | } |
1435 | #endif /* CONFIG_X86_X2APIC */ | 1436 | #endif /* CONFIG_X86_X2APIC */ |
@@ -1943,10 +1944,28 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1943 | 1944 | ||
1944 | void __cpuinit generic_processor_info(int apicid, int version) | 1945 | void __cpuinit generic_processor_info(int apicid, int version) |
1945 | { | 1946 | { |
1946 | int cpu; | 1947 | int cpu, max = nr_cpu_ids; |
1948 | bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, | ||
1949 | phys_cpu_present_map); | ||
1950 | |||
1951 | /* | ||
1952 | * If boot cpu has not been detected yet, then only allow upto | ||
1953 | * nr_cpu_ids - 1 processors and keep one slot free for boot cpu | ||
1954 | */ | ||
1955 | if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && | ||
1956 | apicid != boot_cpu_physical_apicid) { | ||
1957 | int thiscpu = max + disabled_cpus - 1; | ||
1958 | |||
1959 | pr_warning( | ||
1960 | "ACPI: NR_CPUS/possible_cpus limit of %i almost" | ||
1961 | " reached. Keeping one slot for boot cpu." | ||
1962 | " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); | ||
1963 | |||
1964 | disabled_cpus++; | ||
1965 | return; | ||
1966 | } | ||
1947 | 1967 | ||
1948 | if (num_processors >= nr_cpu_ids) { | 1968 | if (num_processors >= nr_cpu_ids) { |
1949 | int max = nr_cpu_ids; | ||
1950 | int thiscpu = max + disabled_cpus; | 1969 | int thiscpu = max + disabled_cpus; |
1951 | 1970 | ||
1952 | pr_warning( | 1971 | pr_warning( |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e5293394b548..8eb863e27ea6 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1295,6 +1295,16 @@ static int setup_ioapic_entry(int apic_id, int irq, | |||
1295 | * irq handler will do the explicit EOI to the io-apic. | 1295 | * irq handler will do the explicit EOI to the io-apic. |
1296 | */ | 1296 | */ |
1297 | ir_entry->vector = pin; | 1297 | ir_entry->vector = pin; |
1298 | |||
1299 | apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " | ||
1300 | "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " | ||
1301 | "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " | ||
1302 | "Avail:%X Vector:%02X Dest:%08X " | ||
1303 | "SID:%04X SQ:%X SVT:%X)\n", | ||
1304 | apic_id, irte.present, irte.fpd, irte.dst_mode, | ||
1305 | irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, | ||
1306 | irte.avail, irte.vector, irte.dest_id, | ||
1307 | irte.sid, irte.sq, irte.svt); | ||
1298 | } else { | 1308 | } else { |
1299 | entry->delivery_mode = apic->irq_delivery_mode; | 1309 | entry->delivery_mode = apic->irq_delivery_mode; |
1300 | entry->dest_mode = apic->irq_dest_mode; | 1310 | entry->dest_mode = apic->irq_dest_mode; |
@@ -1337,9 +1347,9 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, | |||
1337 | 1347 | ||
1338 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1348 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
1339 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1349 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
1340 | "IRQ %d Mode:%i Active:%i)\n", | 1350 | "IRQ %d Mode:%i Active:%i Dest:%d)\n", |
1341 | apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector, | 1351 | apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector, |
1342 | irq, trigger, polarity); | 1352 | irq, trigger, polarity, dest); |
1343 | 1353 | ||
1344 | 1354 | ||
1345 | if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry, | 1355 | if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry, |
@@ -1522,10 +1532,12 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1522 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); | 1532 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); |
1523 | 1533 | ||
1524 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); | 1534 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); |
1525 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | 1535 | printk(KERN_DEBUG "....... : max redirection entries: %02X\n", |
1536 | reg_01.bits.entries); | ||
1526 | 1537 | ||
1527 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | 1538 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); |
1528 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | 1539 | printk(KERN_DEBUG "....... : IO APIC version: %02X\n", |
1540 | reg_01.bits.version); | ||
1529 | 1541 | ||
1530 | /* | 1542 | /* |
1531 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, | 1543 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, |
@@ -1550,31 +1562,60 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1550 | 1562 | ||
1551 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1563 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1552 | 1564 | ||
1553 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" | 1565 | if (intr_remapping_enabled) { |
1554 | " Stat Dmod Deli Vect:\n"); | 1566 | printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" |
1567 | " Pol Stat Indx2 Zero Vect:\n"); | ||
1568 | } else { | ||
1569 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" | ||
1570 | " Stat Dmod Deli Vect:\n"); | ||
1571 | } | ||
1555 | 1572 | ||
1556 | for (i = 0; i <= reg_01.bits.entries; i++) { | 1573 | for (i = 0; i <= reg_01.bits.entries; i++) { |
1557 | struct IO_APIC_route_entry entry; | 1574 | if (intr_remapping_enabled) { |
1558 | 1575 | struct IO_APIC_route_entry entry; | |
1559 | entry = ioapic_read_entry(apic, i); | 1576 | struct IR_IO_APIC_route_entry *ir_entry; |
1560 | 1577 | ||
1561 | printk(KERN_DEBUG " %02x %03X ", | 1578 | entry = ioapic_read_entry(apic, i); |
1562 | i, | 1579 | ir_entry = (struct IR_IO_APIC_route_entry *) &entry; |
1563 | entry.dest | 1580 | printk(KERN_DEBUG " %02x %04X ", |
1564 | ); | 1581 | i, |
1582 | ir_entry->index | ||
1583 | ); | ||
1584 | printk("%1d %1d %1d %1d %1d " | ||
1585 | "%1d %1d %X %02X\n", | ||
1586 | ir_entry->format, | ||
1587 | ir_entry->mask, | ||
1588 | ir_entry->trigger, | ||
1589 | ir_entry->irr, | ||
1590 | ir_entry->polarity, | ||
1591 | ir_entry->delivery_status, | ||
1592 | ir_entry->index2, | ||
1593 | ir_entry->zero, | ||
1594 | ir_entry->vector | ||
1595 | ); | ||
1596 | } else { | ||
1597 | struct IO_APIC_route_entry entry; | ||
1565 | 1598 | ||
1566 | printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", | 1599 | entry = ioapic_read_entry(apic, i); |
1567 | entry.mask, | 1600 | printk(KERN_DEBUG " %02x %02X ", |
1568 | entry.trigger, | 1601 | i, |
1569 | entry.irr, | 1602 | entry.dest |
1570 | entry.polarity, | 1603 | ); |
1571 | entry.delivery_status, | 1604 | printk("%1d %1d %1d %1d %1d " |
1572 | entry.dest_mode, | 1605 | "%1d %1d %02X\n", |
1573 | entry.delivery_mode, | 1606 | entry.mask, |
1574 | entry.vector | 1607 | entry.trigger, |
1575 | ); | 1608 | entry.irr, |
1609 | entry.polarity, | ||
1610 | entry.delivery_status, | ||
1611 | entry.dest_mode, | ||
1612 | entry.delivery_mode, | ||
1613 | entry.vector | ||
1614 | ); | ||
1615 | } | ||
1576 | } | 1616 | } |
1577 | } | 1617 | } |
1618 | |||
1578 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1619 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
1579 | for_each_active_irq(irq) { | 1620 | for_each_active_irq(irq) { |
1580 | struct irq_pin_list *entry; | 1621 | struct irq_pin_list *entry; |
@@ -1792,7 +1833,7 @@ __apicdebuginit(int) print_ICs(void) | |||
1792 | return 0; | 1833 | return 0; |
1793 | } | 1834 | } |
1794 | 1835 | ||
1795 | fs_initcall(print_ICs); | 1836 | late_initcall(print_ICs); |
1796 | 1837 | ||
1797 | 1838 | ||
1798 | /* Where if anywhere is the i8259 connect in external int mode */ | 1839 | /* Where if anywhere is the i8259 connect in external int mode */ |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 965a7666c283..0371c484bb8a 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -229,11 +229,11 @@ | |||
229 | #include <linux/jiffies.h> | 229 | #include <linux/jiffies.h> |
230 | #include <linux/acpi.h> | 230 | #include <linux/acpi.h> |
231 | #include <linux/syscore_ops.h> | 231 | #include <linux/syscore_ops.h> |
232 | #include <linux/i8253.h> | ||
232 | 233 | ||
233 | #include <asm/system.h> | 234 | #include <asm/system.h> |
234 | #include <asm/uaccess.h> | 235 | #include <asm/uaccess.h> |
235 | #include <asm/desc.h> | 236 | #include <asm/desc.h> |
236 | #include <asm/i8253.h> | ||
237 | #include <asm/olpc.h> | 237 | #include <asm/olpc.h> |
238 | #include <asm/paravirt.h> | 238 | #include <asm/paravirt.h> |
239 | #include <asm/reboot.h> | 239 | #include <asm/reboot.h> |
@@ -1220,11 +1220,11 @@ static void reinit_timer(void) | |||
1220 | 1220 | ||
1221 | raw_spin_lock_irqsave(&i8253_lock, flags); | 1221 | raw_spin_lock_irqsave(&i8253_lock, flags); |
1222 | /* set the clock to HZ */ | 1222 | /* set the clock to HZ */ |
1223 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 1223 | outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ |
1224 | udelay(10); | 1224 | udelay(10); |
1225 | outb_pit(LATCH & 0xff, PIT_CH0); /* LSB */ | 1225 | outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ |
1226 | udelay(10); | 1226 | udelay(10); |
1227 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ | 1227 | outb_p(LATCH >> 8, PIT_CH0); /* MSB */ |
1228 | udelay(10); | 1228 | udelay(10); |
1229 | raw_spin_unlock_irqrestore(&i8253_lock, flags); | 1229 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
1230 | #endif | 1230 | #endif |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index c29d631af6fc..395a10e68067 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -63,7 +63,6 @@ void foo(void) | |||
63 | BLANK(); | 63 | BLANK(); |
64 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 64 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
65 | OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); | 65 | OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); |
66 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); | ||
67 | 66 | ||
68 | BLANK(); | 67 | BLANK(); |
69 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); | 68 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 525514cf33c3..46674fbb62ba 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -62,6 +62,8 @@ static void __init check_fpu(void) | |||
62 | return; | 62 | return; |
63 | } | 63 | } |
64 | 64 | ||
65 | kernel_fpu_begin(); | ||
66 | |||
65 | /* | 67 | /* |
66 | * trap_init() enabled FXSR and company _before_ testing for FP | 68 | * trap_init() enabled FXSR and company _before_ testing for FP |
67 | * problems here. | 69 | * problems here. |
@@ -80,6 +82,8 @@ static void __init check_fpu(void) | |||
80 | : "=m" (*&fdiv_bug) | 82 | : "=m" (*&fdiv_bug) |
81 | : "m" (*&x), "m" (*&y)); | 83 | : "m" (*&x), "m" (*&y)); |
82 | 84 | ||
85 | kernel_fpu_end(); | ||
86 | |||
83 | boot_cpu_data.fdiv_bug = fdiv_bug; | 87 | boot_cpu_data.fdiv_bug = fdiv_bug; |
84 | if (boot_cpu_data.fdiv_bug) | 88 | if (boot_cpu_data.fdiv_bug) |
85 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); | 89 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 8095f8611f8a..755f64fb0743 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -32,11 +32,11 @@ | |||
32 | */ | 32 | */ |
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | 33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = |
34 | { | 34 | { |
35 | &x86_hyper_vmware, | ||
36 | &x86_hyper_ms_hyperv, | ||
37 | #ifdef CONFIG_XEN_PVHVM | 35 | #ifdef CONFIG_XEN_PVHVM |
38 | &x86_hyper_xen_hvm, | 36 | &x86_hyper_xen_hvm, |
39 | #endif | 37 | #endif |
38 | &x86_hyper_vmware, | ||
39 | &x86_hyper_ms_hyperv, | ||
40 | }; | 40 | }; |
41 | 41 | ||
42 | const struct hypervisor_x86 *x86_hyper; | 42 | const struct hypervisor_x86 *x86_hyper; |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1edf5ba4fb2b..ed6086eedf1d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -456,6 +456,24 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
456 | 456 | ||
457 | if (cpu_has(c, X86_FEATURE_VMX)) | 457 | if (cpu_has(c, X86_FEATURE_VMX)) |
458 | detect_vmx_virtcap(c); | 458 | detect_vmx_virtcap(c); |
459 | |||
460 | /* | ||
461 | * Initialize MSR_IA32_ENERGY_PERF_BIAS if BIOS did not. | ||
462 | * x86_energy_perf_policy(8) is available to change it at run-time | ||
463 | */ | ||
464 | if (cpu_has(c, X86_FEATURE_EPB)) { | ||
465 | u64 epb; | ||
466 | |||
467 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | ||
468 | if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { | ||
469 | printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" | ||
470 | " Set to 'normal', was 'performance'\n" | ||
471 | "ENERGY_PERF_BIAS: View and update with" | ||
472 | " x86_energy_perf_policy(8)\n"); | ||
473 | epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; | ||
474 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | ||
475 | } | ||
476 | } | ||
459 | } | 477 | } |
460 | 478 | ||
461 | #ifdef CONFIG_X86_32 | 479 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 1e8d66c1336a..7395d5f4272d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -43,61 +43,105 @@ static struct severity { | |||
43 | unsigned char covered; | 43 | unsigned char covered; |
44 | char *msg; | 44 | char *msg; |
45 | } severities[] = { | 45 | } severities[] = { |
46 | #define KERNEL .context = IN_KERNEL | 46 | #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } |
47 | #define USER .context = IN_USER | 47 | #define KERNEL .context = IN_KERNEL |
48 | #define SER .ser = SER_REQUIRED | 48 | #define USER .context = IN_USER |
49 | #define NOSER .ser = NO_SER | 49 | #define SER .ser = SER_REQUIRED |
50 | #define SEV(s) .sev = MCE_ ## s ## _SEVERITY | 50 | #define NOSER .ser = NO_SER |
51 | #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } | 51 | #define BITCLR(x) .mask = x, .result = 0 |
52 | #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } | 52 | #define BITSET(x) .mask = x, .result = x |
53 | #define MCGMASK(x, res, s, m, r...) \ | 53 | #define MCGMASK(x, y) .mcgmask = x, .mcgres = y |
54 | { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } | 54 | #define MASK(x, y) .mask = x, .result = y |
55 | #define MASK(x, y, s, m, r...) \ | ||
56 | { .mask = x, .result = y, SEV(s), .msg = m, ## r } | ||
57 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | 55 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) |
58 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | 56 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) |
59 | #define MCACOD 0xffff | 57 | #define MCACOD 0xffff |
60 | 58 | ||
61 | BITCLR(MCI_STATUS_VAL, NO, "Invalid"), | 59 | MCESEV( |
62 | BITCLR(MCI_STATUS_EN, NO, "Not enabled"), | 60 | NO, "Invalid", |
63 | BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), | 61 | BITCLR(MCI_STATUS_VAL) |
62 | ), | ||
63 | MCESEV( | ||
64 | NO, "Not enabled", | ||
65 | BITCLR(MCI_STATUS_EN) | ||
66 | ), | ||
67 | MCESEV( | ||
68 | PANIC, "Processor context corrupt", | ||
69 | BITSET(MCI_STATUS_PCC) | ||
70 | ), | ||
64 | /* When MCIP is not set something is very confused */ | 71 | /* When MCIP is not set something is very confused */ |
65 | MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), | 72 | MCESEV( |
73 | PANIC, "MCIP not set in MCA handler", | ||
74 | MCGMASK(MCG_STATUS_MCIP, 0) | ||
75 | ), | ||
66 | /* Neither return not error IP -- no chance to recover -> PANIC */ | 76 | /* Neither return not error IP -- no chance to recover -> PANIC */ |
67 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, | 77 | MCESEV( |
68 | "Neither restart nor error IP"), | 78 | PANIC, "Neither restart nor error IP", |
69 | MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", | 79 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) |
70 | KERNEL), | 80 | ), |
71 | BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), | 81 | MCESEV( |
72 | MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, | 82 | PANIC, "In kernel and no restart IP", |
73 | "Spurious not enabled", SER), | 83 | KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) |
84 | ), | ||
85 | MCESEV( | ||
86 | KEEP, "Corrected error", | ||
87 | NOSER, BITCLR(MCI_STATUS_UC) | ||
88 | ), | ||
74 | 89 | ||
75 | /* ignore OVER for UCNA */ | 90 | /* ignore OVER for UCNA */ |
76 | MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, | 91 | MCESEV( |
77 | "Uncorrected no action required", SER), | 92 | KEEP, "Uncorrected no action required", |
78 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, | 93 | SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) |
79 | "Illegal combination (UCNA with AR=1)", SER), | 94 | ), |
80 | MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), | 95 | MCESEV( |
96 | PANIC, "Illegal combination (UCNA with AR=1)", | ||
97 | SER, | ||
98 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR) | ||
99 | ), | ||
100 | MCESEV( | ||
101 | KEEP, "Non signalled machine check", | ||
102 | SER, BITCLR(MCI_STATUS_S) | ||
103 | ), | ||
81 | 104 | ||
82 | /* AR add known MCACODs here */ | 105 | /* AR add known MCACODs here */ |
83 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, | 106 | MCESEV( |
84 | "Action required with lost events", SER), | 107 | PANIC, "Action required with lost events", |
85 | MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, | 108 | SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) |
86 | "Action required; unknown MCACOD", SER), | 109 | ), |
110 | MCESEV( | ||
111 | PANIC, "Action required: unknown MCACOD", | ||
112 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) | ||
113 | ), | ||
87 | 114 | ||
88 | /* known AO MCACODs: */ | 115 | /* known AO MCACODs: */ |
89 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, | 116 | MCESEV( |
90 | "Action optional: memory scrubbing error", SER), | 117 | AO, "Action optional: memory scrubbing error", |
91 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, | 118 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) |
92 | "Action optional: last level cache writeback error", SER), | 119 | ), |
93 | 120 | MCESEV( | |
94 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, | 121 | AO, "Action optional: last level cache writeback error", |
95 | "Action optional unknown MCACOD", SER), | 122 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) |
96 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, | 123 | ), |
97 | "Action optional with lost events", SER), | 124 | MCESEV( |
98 | BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), | 125 | SOME, "Action optional: unknown MCACOD", |
99 | BITSET(MCI_STATUS_UC, UC, "Uncorrected"), | 126 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) |
100 | BITSET(0, SOME, "No match") /* always matches. keep at end */ | 127 | ), |
128 | MCESEV( | ||
129 | SOME, "Action optional with lost events", | ||
130 | SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S) | ||
131 | ), | ||
132 | |||
133 | MCESEV( | ||
134 | PANIC, "Overflowed uncorrected", | ||
135 | BITSET(MCI_STATUS_OVER|MCI_STATUS_UC) | ||
136 | ), | ||
137 | MCESEV( | ||
138 | UC, "Uncorrected", | ||
139 | BITSET(MCI_STATUS_UC) | ||
140 | ), | ||
141 | MCESEV( | ||
142 | SOME, "No match", | ||
143 | BITSET(0) | ||
144 | ) /* always matches. keep at end */ | ||
101 | }; | 145 | }; |
102 | 146 | ||
103 | /* | 147 | /* |
@@ -112,15 +156,15 @@ static int error_context(struct mce *m) | |||
112 | return IN_KERNEL; | 156 | return IN_KERNEL; |
113 | } | 157 | } |
114 | 158 | ||
115 | int mce_severity(struct mce *a, int tolerant, char **msg) | 159 | int mce_severity(struct mce *m, int tolerant, char **msg) |
116 | { | 160 | { |
117 | enum context ctx = error_context(a); | 161 | enum context ctx = error_context(m); |
118 | struct severity *s; | 162 | struct severity *s; |
119 | 163 | ||
120 | for (s = severities;; s++) { | 164 | for (s = severities;; s++) { |
121 | if ((a->status & s->mask) != s->result) | 165 | if ((m->status & s->mask) != s->result) |
122 | continue; | 166 | continue; |
123 | if ((a->mcgstatus & s->mcgmask) != s->mcgres) | 167 | if ((m->mcgstatus & s->mcgmask) != s->mcgres) |
124 | continue; | 168 | continue; |
125 | if (s->ser == SER_REQUIRED && !mce_ser) | 169 | if (s->ser == SER_REQUIRED && !mce_ser) |
126 | continue; | 170 | continue; |
@@ -197,15 +241,15 @@ static const struct file_operations severities_coverage_fops = { | |||
197 | 241 | ||
198 | static int __init severities_debugfs_init(void) | 242 | static int __init severities_debugfs_init(void) |
199 | { | 243 | { |
200 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | 244 | struct dentry *dmce, *fsev; |
201 | 245 | ||
202 | dmce = mce_get_debugfs_dir(); | 246 | dmce = mce_get_debugfs_dir(); |
203 | if (dmce == NULL) | 247 | if (!dmce) |
204 | goto err_out; | 248 | goto err_out; |
205 | fseverities_coverage = debugfs_create_file("severities-coverage", | 249 | |
206 | 0444, dmce, NULL, | 250 | fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, |
207 | &severities_coverage_fops); | 251 | &severities_coverage_fops); |
208 | if (fseverities_coverage == NULL) | 252 | if (!fsev) |
209 | goto err_out; | 253 | goto err_out; |
210 | 254 | ||
211 | return 0; | 255 | return 0; |
@@ -214,4 +258,4 @@ err_out: | |||
214 | return -ENOMEM; | 258 | return -ENOMEM; |
215 | } | 259 | } |
216 | late_initcall(severities_debugfs_init); | 260 | late_initcall(severities_debugfs_init); |
217 | #endif | 261 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff1ae9b6464d..08363b042122 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/thread_info.h> | 10 | #include <linux/thread_info.h> |
11 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
12 | #include <linux/miscdevice.h> | 12 | #include <linux/miscdevice.h> |
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/ratelimit.h> | 13 | #include <linux/ratelimit.h> |
15 | #include <linux/kallsyms.h> | 14 | #include <linux/kallsyms.h> |
16 | #include <linux/rcupdate.h> | 15 | #include <linux/rcupdate.h> |
@@ -38,23 +37,20 @@ | |||
38 | #include <linux/mm.h> | 37 | #include <linux/mm.h> |
39 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
40 | #include <linux/edac_mce.h> | 39 | #include <linux/edac_mce.h> |
40 | #include <linux/irq_work.h> | ||
41 | 41 | ||
42 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
43 | #include <asm/hw_irq.h> | ||
44 | #include <asm/apic.h> | ||
45 | #include <asm/idle.h> | ||
46 | #include <asm/ipi.h> | ||
47 | #include <asm/mce.h> | 43 | #include <asm/mce.h> |
48 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
49 | 45 | ||
50 | #include "mce-internal.h" | 46 | #include "mce-internal.h" |
51 | 47 | ||
52 | static DEFINE_MUTEX(mce_read_mutex); | 48 | static DEFINE_MUTEX(mce_chrdev_read_mutex); |
53 | 49 | ||
54 | #define rcu_dereference_check_mce(p) \ | 50 | #define rcu_dereference_check_mce(p) \ |
55 | rcu_dereference_index_check((p), \ | 51 | rcu_dereference_index_check((p), \ |
56 | rcu_read_lock_sched_held() || \ | 52 | rcu_read_lock_sched_held() || \ |
57 | lockdep_is_held(&mce_read_mutex)) | 53 | lockdep_is_held(&mce_chrdev_read_mutex)) |
58 | 54 | ||
59 | #define CREATE_TRACE_POINTS | 55 | #define CREATE_TRACE_POINTS |
60 | #include <trace/events/mce.h> | 56 | #include <trace/events/mce.h> |
@@ -94,7 +90,8 @@ static unsigned long mce_need_notify; | |||
94 | static char mce_helper[128]; | 90 | static char mce_helper[128]; |
95 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | 91 | static char *mce_helper_argv[2] = { mce_helper, NULL }; |
96 | 92 | ||
97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | 93 | static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); |
94 | |||
98 | static DEFINE_PER_CPU(struct mce, mces_seen); | 95 | static DEFINE_PER_CPU(struct mce, mces_seen); |
99 | static int cpu_missing; | 96 | static int cpu_missing; |
100 | 97 | ||
@@ -373,6 +370,31 @@ static void mce_wrmsrl(u32 msr, u64 v) | |||
373 | } | 370 | } |
374 | 371 | ||
375 | /* | 372 | /* |
373 | * Collect all global (w.r.t. this processor) status about this machine | ||
374 | * check into our "mce" struct so that we can use it later to assess | ||
375 | * the severity of the problem as we read per-bank specific details. | ||
376 | */ | ||
377 | static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) | ||
378 | { | ||
379 | mce_setup(m); | ||
380 | |||
381 | m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
382 | if (regs) { | ||
383 | /* | ||
384 | * Get the address of the instruction at the time of | ||
385 | * the machine check error. | ||
386 | */ | ||
387 | if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { | ||
388 | m->ip = regs->ip; | ||
389 | m->cs = regs->cs; | ||
390 | } | ||
391 | /* Use accurate RIP reporting if available. */ | ||
392 | if (rip_msr) | ||
393 | m->ip = mce_rdmsrl(rip_msr); | ||
394 | } | ||
395 | } | ||
396 | |||
397 | /* | ||
376 | * Simple lockless ring to communicate PFNs from the exception handler with the | 398 | * Simple lockless ring to communicate PFNs from the exception handler with the |
377 | * process context work function. This is vastly simplified because there's | 399 | * process context work function. This is vastly simplified because there's |
378 | * only a single reader and a single writer. | 400 | * only a single reader and a single writer. |
@@ -443,40 +465,13 @@ static void mce_schedule_work(void) | |||
443 | } | 465 | } |
444 | } | 466 | } |
445 | 467 | ||
446 | /* | 468 | DEFINE_PER_CPU(struct irq_work, mce_irq_work); |
447 | * Get the address of the instruction at the time of the machine check | ||
448 | * error. | ||
449 | */ | ||
450 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
451 | { | ||
452 | |||
453 | if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { | ||
454 | m->ip = regs->ip; | ||
455 | m->cs = regs->cs; | ||
456 | } else { | ||
457 | m->ip = 0; | ||
458 | m->cs = 0; | ||
459 | } | ||
460 | if (rip_msr) | ||
461 | m->ip = mce_rdmsrl(rip_msr); | ||
462 | } | ||
463 | 469 | ||
464 | #ifdef CONFIG_X86_LOCAL_APIC | 470 | static void mce_irq_work_cb(struct irq_work *entry) |
465 | /* | ||
466 | * Called after interrupts have been reenabled again | ||
467 | * when a MCE happened during an interrupts off region | ||
468 | * in the kernel. | ||
469 | */ | ||
470 | asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) | ||
471 | { | 471 | { |
472 | ack_APIC_irq(); | ||
473 | exit_idle(); | ||
474 | irq_enter(); | ||
475 | mce_notify_irq(); | 472 | mce_notify_irq(); |
476 | mce_schedule_work(); | 473 | mce_schedule_work(); |
477 | irq_exit(); | ||
478 | } | 474 | } |
479 | #endif | ||
480 | 475 | ||
481 | static void mce_report_event(struct pt_regs *regs) | 476 | static void mce_report_event(struct pt_regs *regs) |
482 | { | 477 | { |
@@ -492,29 +487,7 @@ static void mce_report_event(struct pt_regs *regs) | |||
492 | return; | 487 | return; |
493 | } | 488 | } |
494 | 489 | ||
495 | #ifdef CONFIG_X86_LOCAL_APIC | 490 | irq_work_queue(&__get_cpu_var(mce_irq_work)); |
496 | /* | ||
497 | * Without APIC do not notify. The event will be picked | ||
498 | * up eventually. | ||
499 | */ | ||
500 | if (!cpu_has_apic) | ||
501 | return; | ||
502 | |||
503 | /* | ||
504 | * When interrupts are disabled we cannot use | ||
505 | * kernel services safely. Trigger an self interrupt | ||
506 | * through the APIC to instead do the notification | ||
507 | * after interrupts are reenabled again. | ||
508 | */ | ||
509 | apic->send_IPI_self(MCE_SELF_VECTOR); | ||
510 | |||
511 | /* | ||
512 | * Wait for idle afterwards again so that we don't leave the | ||
513 | * APIC in a non idle state because the normal APIC writes | ||
514 | * cannot exclude us. | ||
515 | */ | ||
516 | apic_wait_icr_idle(); | ||
517 | #endif | ||
518 | } | 491 | } |
519 | 492 | ||
520 | DEFINE_PER_CPU(unsigned, mce_poll_count); | 493 | DEFINE_PER_CPU(unsigned, mce_poll_count); |
@@ -541,9 +514,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
541 | 514 | ||
542 | percpu_inc(mce_poll_count); | 515 | percpu_inc(mce_poll_count); |
543 | 516 | ||
544 | mce_setup(&m); | 517 | mce_gather_info(&m, NULL); |
545 | 518 | ||
546 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
547 | for (i = 0; i < banks; i++) { | 519 | for (i = 0; i < banks; i++) { |
548 | if (!mce_banks[i].ctl || !test_bit(i, *b)) | 520 | if (!mce_banks[i].ctl || !test_bit(i, *b)) |
549 | continue; | 521 | continue; |
@@ -879,9 +851,9 @@ static int mce_usable_address(struct mce *m) | |||
879 | { | 851 | { |
880 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | 852 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) |
881 | return 0; | 853 | return 0; |
882 | if ((m->misc & 0x3f) > PAGE_SHIFT) | 854 | if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) |
883 | return 0; | 855 | return 0; |
884 | if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) | 856 | if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) |
885 | return 0; | 857 | return 0; |
886 | return 1; | 858 | return 1; |
887 | } | 859 | } |
@@ -942,9 +914,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
942 | if (!banks) | 914 | if (!banks) |
943 | goto out; | 915 | goto out; |
944 | 916 | ||
945 | mce_setup(&m); | 917 | mce_gather_info(&m, regs); |
946 | 918 | ||
947 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
948 | final = &__get_cpu_var(mces_seen); | 919 | final = &__get_cpu_var(mces_seen); |
949 | *final = m; | 920 | *final = m; |
950 | 921 | ||
@@ -1028,7 +999,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1028 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | 999 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) |
1029 | mce_ring_add(m.addr >> PAGE_SHIFT); | 1000 | mce_ring_add(m.addr >> PAGE_SHIFT); |
1030 | 1001 | ||
1031 | mce_get_rip(&m, regs); | ||
1032 | mce_log(&m); | 1002 | mce_log(&m); |
1033 | 1003 | ||
1034 | if (severity > worst) { | 1004 | if (severity > worst) { |
@@ -1190,7 +1160,8 @@ int mce_notify_irq(void) | |||
1190 | clear_thread_flag(TIF_MCE_NOTIFY); | 1160 | clear_thread_flag(TIF_MCE_NOTIFY); |
1191 | 1161 | ||
1192 | if (test_and_clear_bit(0, &mce_need_notify)) { | 1162 | if (test_and_clear_bit(0, &mce_need_notify)) { |
1193 | wake_up_interruptible(&mce_wait); | 1163 | /* wake processes polling /dev/mcelog */ |
1164 | wake_up_interruptible(&mce_chrdev_wait); | ||
1194 | 1165 | ||
1195 | /* | 1166 | /* |
1196 | * There is no risk of missing notifications because | 1167 | * There is no risk of missing notifications because |
@@ -1363,18 +1334,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1363 | return 0; | 1334 | return 0; |
1364 | } | 1335 | } |
1365 | 1336 | ||
1366 | static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) | 1337 | static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) |
1367 | { | 1338 | { |
1368 | if (c->x86 != 5) | 1339 | if (c->x86 != 5) |
1369 | return; | 1340 | return 0; |
1341 | |||
1370 | switch (c->x86_vendor) { | 1342 | switch (c->x86_vendor) { |
1371 | case X86_VENDOR_INTEL: | 1343 | case X86_VENDOR_INTEL: |
1372 | intel_p5_mcheck_init(c); | 1344 | intel_p5_mcheck_init(c); |
1345 | return 1; | ||
1373 | break; | 1346 | break; |
1374 | case X86_VENDOR_CENTAUR: | 1347 | case X86_VENDOR_CENTAUR: |
1375 | winchip_mcheck_init(c); | 1348 | winchip_mcheck_init(c); |
1349 | return 1; | ||
1376 | break; | 1350 | break; |
1377 | } | 1351 | } |
1352 | |||
1353 | return 0; | ||
1378 | } | 1354 | } |
1379 | 1355 | ||
1380 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | 1356 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) |
@@ -1428,7 +1404,8 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1428 | if (mce_disabled) | 1404 | if (mce_disabled) |
1429 | return; | 1405 | return; |
1430 | 1406 | ||
1431 | __mcheck_cpu_ancient_init(c); | 1407 | if (__mcheck_cpu_ancient_init(c)) |
1408 | return; | ||
1432 | 1409 | ||
1433 | if (!mce_available(c)) | 1410 | if (!mce_available(c)) |
1434 | return; | 1411 | return; |
@@ -1444,44 +1421,45 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1444 | __mcheck_cpu_init_vendor(c); | 1421 | __mcheck_cpu_init_vendor(c); |
1445 | __mcheck_cpu_init_timer(); | 1422 | __mcheck_cpu_init_timer(); |
1446 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1423 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1447 | 1424 | init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); | |
1448 | } | 1425 | } |
1449 | 1426 | ||
1450 | /* | 1427 | /* |
1451 | * Character device to read and clear the MCE log. | 1428 | * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. |
1452 | */ | 1429 | */ |
1453 | 1430 | ||
1454 | static DEFINE_SPINLOCK(mce_state_lock); | 1431 | static DEFINE_SPINLOCK(mce_chrdev_state_lock); |
1455 | static int open_count; /* #times opened */ | 1432 | static int mce_chrdev_open_count; /* #times opened */ |
1456 | static int open_exclu; /* already open exclusive? */ | 1433 | static int mce_chrdev_open_exclu; /* already open exclusive? */ |
1457 | 1434 | ||
1458 | static int mce_open(struct inode *inode, struct file *file) | 1435 | static int mce_chrdev_open(struct inode *inode, struct file *file) |
1459 | { | 1436 | { |
1460 | spin_lock(&mce_state_lock); | 1437 | spin_lock(&mce_chrdev_state_lock); |
1461 | 1438 | ||
1462 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | 1439 | if (mce_chrdev_open_exclu || |
1463 | spin_unlock(&mce_state_lock); | 1440 | (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { |
1441 | spin_unlock(&mce_chrdev_state_lock); | ||
1464 | 1442 | ||
1465 | return -EBUSY; | 1443 | return -EBUSY; |
1466 | } | 1444 | } |
1467 | 1445 | ||
1468 | if (file->f_flags & O_EXCL) | 1446 | if (file->f_flags & O_EXCL) |
1469 | open_exclu = 1; | 1447 | mce_chrdev_open_exclu = 1; |
1470 | open_count++; | 1448 | mce_chrdev_open_count++; |
1471 | 1449 | ||
1472 | spin_unlock(&mce_state_lock); | 1450 | spin_unlock(&mce_chrdev_state_lock); |
1473 | 1451 | ||
1474 | return nonseekable_open(inode, file); | 1452 | return nonseekable_open(inode, file); |
1475 | } | 1453 | } |
1476 | 1454 | ||
1477 | static int mce_release(struct inode *inode, struct file *file) | 1455 | static int mce_chrdev_release(struct inode *inode, struct file *file) |
1478 | { | 1456 | { |
1479 | spin_lock(&mce_state_lock); | 1457 | spin_lock(&mce_chrdev_state_lock); |
1480 | 1458 | ||
1481 | open_count--; | 1459 | mce_chrdev_open_count--; |
1482 | open_exclu = 0; | 1460 | mce_chrdev_open_exclu = 0; |
1483 | 1461 | ||
1484 | spin_unlock(&mce_state_lock); | 1462 | spin_unlock(&mce_chrdev_state_lock); |
1485 | 1463 | ||
1486 | return 0; | 1464 | return 0; |
1487 | } | 1465 | } |
@@ -1530,8 +1508,8 @@ static int __mce_read_apei(char __user **ubuf, size_t usize) | |||
1530 | return 0; | 1508 | return 0; |
1531 | } | 1509 | } |
1532 | 1510 | ||
1533 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1511 | static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, |
1534 | loff_t *off) | 1512 | size_t usize, loff_t *off) |
1535 | { | 1513 | { |
1536 | char __user *buf = ubuf; | 1514 | char __user *buf = ubuf; |
1537 | unsigned long *cpu_tsc; | 1515 | unsigned long *cpu_tsc; |
@@ -1542,7 +1520,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1542 | if (!cpu_tsc) | 1520 | if (!cpu_tsc) |
1543 | return -ENOMEM; | 1521 | return -ENOMEM; |
1544 | 1522 | ||
1545 | mutex_lock(&mce_read_mutex); | 1523 | mutex_lock(&mce_chrdev_read_mutex); |
1546 | 1524 | ||
1547 | if (!mce_apei_read_done) { | 1525 | if (!mce_apei_read_done) { |
1548 | err = __mce_read_apei(&buf, usize); | 1526 | err = __mce_read_apei(&buf, usize); |
@@ -1562,19 +1540,18 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1562 | do { | 1540 | do { |
1563 | for (i = prev; i < next; i++) { | 1541 | for (i = prev; i < next; i++) { |
1564 | unsigned long start = jiffies; | 1542 | unsigned long start = jiffies; |
1543 | struct mce *m = &mcelog.entry[i]; | ||
1565 | 1544 | ||
1566 | while (!mcelog.entry[i].finished) { | 1545 | while (!m->finished) { |
1567 | if (time_after_eq(jiffies, start + 2)) { | 1546 | if (time_after_eq(jiffies, start + 2)) { |
1568 | memset(mcelog.entry + i, 0, | 1547 | memset(m, 0, sizeof(*m)); |
1569 | sizeof(struct mce)); | ||
1570 | goto timeout; | 1548 | goto timeout; |
1571 | } | 1549 | } |
1572 | cpu_relax(); | 1550 | cpu_relax(); |
1573 | } | 1551 | } |
1574 | smp_rmb(); | 1552 | smp_rmb(); |
1575 | err |= copy_to_user(buf, mcelog.entry + i, | 1553 | err |= copy_to_user(buf, m, sizeof(*m)); |
1576 | sizeof(struct mce)); | 1554 | buf += sizeof(*m); |
1577 | buf += sizeof(struct mce); | ||
1578 | timeout: | 1555 | timeout: |
1579 | ; | 1556 | ; |
1580 | } | 1557 | } |
@@ -1594,13 +1571,13 @@ timeout: | |||
1594 | on_each_cpu(collect_tscs, cpu_tsc, 1); | 1571 | on_each_cpu(collect_tscs, cpu_tsc, 1); |
1595 | 1572 | ||
1596 | for (i = next; i < MCE_LOG_LEN; i++) { | 1573 | for (i = next; i < MCE_LOG_LEN; i++) { |
1597 | if (mcelog.entry[i].finished && | 1574 | struct mce *m = &mcelog.entry[i]; |
1598 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | 1575 | |
1599 | err |= copy_to_user(buf, mcelog.entry+i, | 1576 | if (m->finished && m->tsc < cpu_tsc[m->cpu]) { |
1600 | sizeof(struct mce)); | 1577 | err |= copy_to_user(buf, m, sizeof(*m)); |
1601 | smp_rmb(); | 1578 | smp_rmb(); |
1602 | buf += sizeof(struct mce); | 1579 | buf += sizeof(*m); |
1603 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1580 | memset(m, 0, sizeof(*m)); |
1604 | } | 1581 | } |
1605 | } | 1582 | } |
1606 | 1583 | ||
@@ -1608,15 +1585,15 @@ timeout: | |||
1608 | err = -EFAULT; | 1585 | err = -EFAULT; |
1609 | 1586 | ||
1610 | out: | 1587 | out: |
1611 | mutex_unlock(&mce_read_mutex); | 1588 | mutex_unlock(&mce_chrdev_read_mutex); |
1612 | kfree(cpu_tsc); | 1589 | kfree(cpu_tsc); |
1613 | 1590 | ||
1614 | return err ? err : buf - ubuf; | 1591 | return err ? err : buf - ubuf; |
1615 | } | 1592 | } |
1616 | 1593 | ||
1617 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1594 | static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) |
1618 | { | 1595 | { |
1619 | poll_wait(file, &mce_wait, wait); | 1596 | poll_wait(file, &mce_chrdev_wait, wait); |
1620 | if (rcu_access_index(mcelog.next)) | 1597 | if (rcu_access_index(mcelog.next)) |
1621 | return POLLIN | POLLRDNORM; | 1598 | return POLLIN | POLLRDNORM; |
1622 | if (!mce_apei_read_done && apei_check_mce()) | 1599 | if (!mce_apei_read_done && apei_check_mce()) |
@@ -1624,7 +1601,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
1624 | return 0; | 1601 | return 0; |
1625 | } | 1602 | } |
1626 | 1603 | ||
1627 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | 1604 | static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, |
1605 | unsigned long arg) | ||
1628 | { | 1606 | { |
1629 | int __user *p = (int __user *)arg; | 1607 | int __user *p = (int __user *)arg; |
1630 | 1608 | ||
@@ -1652,16 +1630,16 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | |||
1652 | 1630 | ||
1653 | /* Modified in mce-inject.c, so not static or const */ | 1631 | /* Modified in mce-inject.c, so not static or const */ |
1654 | struct file_operations mce_chrdev_ops = { | 1632 | struct file_operations mce_chrdev_ops = { |
1655 | .open = mce_open, | 1633 | .open = mce_chrdev_open, |
1656 | .release = mce_release, | 1634 | .release = mce_chrdev_release, |
1657 | .read = mce_read, | 1635 | .read = mce_chrdev_read, |
1658 | .poll = mce_poll, | 1636 | .poll = mce_chrdev_poll, |
1659 | .unlocked_ioctl = mce_ioctl, | 1637 | .unlocked_ioctl = mce_chrdev_ioctl, |
1660 | .llseek = no_llseek, | 1638 | .llseek = no_llseek, |
1661 | }; | 1639 | }; |
1662 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); | 1640 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); |
1663 | 1641 | ||
1664 | static struct miscdevice mce_log_device = { | 1642 | static struct miscdevice mce_chrdev_device = { |
1665 | MISC_MCELOG_MINOR, | 1643 | MISC_MCELOG_MINOR, |
1666 | "mcelog", | 1644 | "mcelog", |
1667 | &mce_chrdev_ops, | 1645 | &mce_chrdev_ops, |
@@ -1719,7 +1697,7 @@ int __init mcheck_init(void) | |||
1719 | } | 1697 | } |
1720 | 1698 | ||
1721 | /* | 1699 | /* |
1722 | * Sysfs support | 1700 | * mce_syscore: PM support |
1723 | */ | 1701 | */ |
1724 | 1702 | ||
1725 | /* | 1703 | /* |
@@ -1739,12 +1717,12 @@ static int mce_disable_error_reporting(void) | |||
1739 | return 0; | 1717 | return 0; |
1740 | } | 1718 | } |
1741 | 1719 | ||
1742 | static int mce_suspend(void) | 1720 | static int mce_syscore_suspend(void) |
1743 | { | 1721 | { |
1744 | return mce_disable_error_reporting(); | 1722 | return mce_disable_error_reporting(); |
1745 | } | 1723 | } |
1746 | 1724 | ||
1747 | static void mce_shutdown(void) | 1725 | static void mce_syscore_shutdown(void) |
1748 | { | 1726 | { |
1749 | mce_disable_error_reporting(); | 1727 | mce_disable_error_reporting(); |
1750 | } | 1728 | } |
@@ -1754,18 +1732,22 @@ static void mce_shutdown(void) | |||
1754 | * Only one CPU is active at this time, the others get re-added later using | 1732 | * Only one CPU is active at this time, the others get re-added later using |
1755 | * CPU hotplug: | 1733 | * CPU hotplug: |
1756 | */ | 1734 | */ |
1757 | static void mce_resume(void) | 1735 | static void mce_syscore_resume(void) |
1758 | { | 1736 | { |
1759 | __mcheck_cpu_init_generic(); | 1737 | __mcheck_cpu_init_generic(); |
1760 | __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); | 1738 | __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); |
1761 | } | 1739 | } |
1762 | 1740 | ||
1763 | static struct syscore_ops mce_syscore_ops = { | 1741 | static struct syscore_ops mce_syscore_ops = { |
1764 | .suspend = mce_suspend, | 1742 | .suspend = mce_syscore_suspend, |
1765 | .shutdown = mce_shutdown, | 1743 | .shutdown = mce_syscore_shutdown, |
1766 | .resume = mce_resume, | 1744 | .resume = mce_syscore_resume, |
1767 | }; | 1745 | }; |
1768 | 1746 | ||
1747 | /* | ||
1748 | * mce_sysdev: Sysfs support | ||
1749 | */ | ||
1750 | |||
1769 | static void mce_cpu_restart(void *data) | 1751 | static void mce_cpu_restart(void *data) |
1770 | { | 1752 | { |
1771 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1753 | del_timer_sync(&__get_cpu_var(mce_timer)); |
@@ -1801,11 +1783,11 @@ static void mce_enable_ce(void *all) | |||
1801 | __mcheck_cpu_init_timer(); | 1783 | __mcheck_cpu_init_timer(); |
1802 | } | 1784 | } |
1803 | 1785 | ||
1804 | static struct sysdev_class mce_sysclass = { | 1786 | static struct sysdev_class mce_sysdev_class = { |
1805 | .name = "machinecheck", | 1787 | .name = "machinecheck", |
1806 | }; | 1788 | }; |
1807 | 1789 | ||
1808 | DEFINE_PER_CPU(struct sys_device, mce_dev); | 1790 | DEFINE_PER_CPU(struct sys_device, mce_sysdev); |
1809 | 1791 | ||
1810 | __cpuinitdata | 1792 | __cpuinitdata |
1811 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1793 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
@@ -1934,7 +1916,7 @@ static struct sysdev_ext_attribute attr_cmci_disabled = { | |||
1934 | &mce_cmci_disabled | 1916 | &mce_cmci_disabled |
1935 | }; | 1917 | }; |
1936 | 1918 | ||
1937 | static struct sysdev_attribute *mce_attrs[] = { | 1919 | static struct sysdev_attribute *mce_sysdev_attrs[] = { |
1938 | &attr_tolerant.attr, | 1920 | &attr_tolerant.attr, |
1939 | &attr_check_interval.attr, | 1921 | &attr_check_interval.attr, |
1940 | &attr_trigger, | 1922 | &attr_trigger, |
@@ -1945,66 +1927,67 @@ static struct sysdev_attribute *mce_attrs[] = { | |||
1945 | NULL | 1927 | NULL |
1946 | }; | 1928 | }; |
1947 | 1929 | ||
1948 | static cpumask_var_t mce_dev_initialized; | 1930 | static cpumask_var_t mce_sysdev_initialized; |
1949 | 1931 | ||
1950 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | 1932 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ |
1951 | static __cpuinit int mce_create_device(unsigned int cpu) | 1933 | static __cpuinit int mce_sysdev_create(unsigned int cpu) |
1952 | { | 1934 | { |
1935 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | ||
1953 | int err; | 1936 | int err; |
1954 | int i, j; | 1937 | int i, j; |
1955 | 1938 | ||
1956 | if (!mce_available(&boot_cpu_data)) | 1939 | if (!mce_available(&boot_cpu_data)) |
1957 | return -EIO; | 1940 | return -EIO; |
1958 | 1941 | ||
1959 | memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); | 1942 | memset(&sysdev->kobj, 0, sizeof(struct kobject)); |
1960 | per_cpu(mce_dev, cpu).id = cpu; | 1943 | sysdev->id = cpu; |
1961 | per_cpu(mce_dev, cpu).cls = &mce_sysclass; | 1944 | sysdev->cls = &mce_sysdev_class; |
1962 | 1945 | ||
1963 | err = sysdev_register(&per_cpu(mce_dev, cpu)); | 1946 | err = sysdev_register(sysdev); |
1964 | if (err) | 1947 | if (err) |
1965 | return err; | 1948 | return err; |
1966 | 1949 | ||
1967 | for (i = 0; mce_attrs[i]; i++) { | 1950 | for (i = 0; mce_sysdev_attrs[i]; i++) { |
1968 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1951 | err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); |
1969 | if (err) | 1952 | if (err) |
1970 | goto error; | 1953 | goto error; |
1971 | } | 1954 | } |
1972 | for (j = 0; j < banks; j++) { | 1955 | for (j = 0; j < banks; j++) { |
1973 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1956 | err = sysdev_create_file(sysdev, &mce_banks[j].attr); |
1974 | &mce_banks[j].attr); | ||
1975 | if (err) | 1957 | if (err) |
1976 | goto error2; | 1958 | goto error2; |
1977 | } | 1959 | } |
1978 | cpumask_set_cpu(cpu, mce_dev_initialized); | 1960 | cpumask_set_cpu(cpu, mce_sysdev_initialized); |
1979 | 1961 | ||
1980 | return 0; | 1962 | return 0; |
1981 | error2: | 1963 | error2: |
1982 | while (--j >= 0) | 1964 | while (--j >= 0) |
1983 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); | 1965 | sysdev_remove_file(sysdev, &mce_banks[j].attr); |
1984 | error: | 1966 | error: |
1985 | while (--i >= 0) | 1967 | while (--i >= 0) |
1986 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1968 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); |
1987 | 1969 | ||
1988 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1970 | sysdev_unregister(sysdev); |
1989 | 1971 | ||
1990 | return err; | 1972 | return err; |
1991 | } | 1973 | } |
1992 | 1974 | ||
1993 | static __cpuinit void mce_remove_device(unsigned int cpu) | 1975 | static __cpuinit void mce_sysdev_remove(unsigned int cpu) |
1994 | { | 1976 | { |
1977 | struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); | ||
1995 | int i; | 1978 | int i; |
1996 | 1979 | ||
1997 | if (!cpumask_test_cpu(cpu, mce_dev_initialized)) | 1980 | if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) |
1998 | return; | 1981 | return; |
1999 | 1982 | ||
2000 | for (i = 0; mce_attrs[i]; i++) | 1983 | for (i = 0; mce_sysdev_attrs[i]; i++) |
2001 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1984 | sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); |
2002 | 1985 | ||
2003 | for (i = 0; i < banks; i++) | 1986 | for (i = 0; i < banks; i++) |
2004 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); | 1987 | sysdev_remove_file(sysdev, &mce_banks[i].attr); |
2005 | 1988 | ||
2006 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1989 | sysdev_unregister(sysdev); |
2007 | cpumask_clear_cpu(cpu, mce_dev_initialized); | 1990 | cpumask_clear_cpu(cpu, mce_sysdev_initialized); |
2008 | } | 1991 | } |
2009 | 1992 | ||
2010 | /* Make sure there are no machine checks on offlined CPUs. */ | 1993 | /* Make sure there are no machine checks on offlined CPUs. */ |
@@ -2054,7 +2037,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2054 | switch (action) { | 2037 | switch (action) { |
2055 | case CPU_ONLINE: | 2038 | case CPU_ONLINE: |
2056 | case CPU_ONLINE_FROZEN: | 2039 | case CPU_ONLINE_FROZEN: |
2057 | mce_create_device(cpu); | 2040 | mce_sysdev_create(cpu); |
2058 | if (threshold_cpu_callback) | 2041 | if (threshold_cpu_callback) |
2059 | threshold_cpu_callback(action, cpu); | 2042 | threshold_cpu_callback(action, cpu); |
2060 | break; | 2043 | break; |
@@ -2062,7 +2045,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2062 | case CPU_DEAD_FROZEN: | 2045 | case CPU_DEAD_FROZEN: |
2063 | if (threshold_cpu_callback) | 2046 | if (threshold_cpu_callback) |
2064 | threshold_cpu_callback(action, cpu); | 2047 | threshold_cpu_callback(action, cpu); |
2065 | mce_remove_device(cpu); | 2048 | mce_sysdev_remove(cpu); |
2066 | break; | 2049 | break; |
2067 | case CPU_DOWN_PREPARE: | 2050 | case CPU_DOWN_PREPARE: |
2068 | case CPU_DOWN_PREPARE_FROZEN: | 2051 | case CPU_DOWN_PREPARE_FROZEN: |
@@ -2116,27 +2099,28 @@ static __init int mcheck_init_device(void) | |||
2116 | if (!mce_available(&boot_cpu_data)) | 2099 | if (!mce_available(&boot_cpu_data)) |
2117 | return -EIO; | 2100 | return -EIO; |
2118 | 2101 | ||
2119 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 2102 | zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); |
2120 | 2103 | ||
2121 | mce_init_banks(); | 2104 | mce_init_banks(); |
2122 | 2105 | ||
2123 | err = sysdev_class_register(&mce_sysclass); | 2106 | err = sysdev_class_register(&mce_sysdev_class); |
2124 | if (err) | 2107 | if (err) |
2125 | return err; | 2108 | return err; |
2126 | 2109 | ||
2127 | for_each_online_cpu(i) { | 2110 | for_each_online_cpu(i) { |
2128 | err = mce_create_device(i); | 2111 | err = mce_sysdev_create(i); |
2129 | if (err) | 2112 | if (err) |
2130 | return err; | 2113 | return err; |
2131 | } | 2114 | } |
2132 | 2115 | ||
2133 | register_syscore_ops(&mce_syscore_ops); | 2116 | register_syscore_ops(&mce_syscore_ops); |
2134 | register_hotcpu_notifier(&mce_cpu_notifier); | 2117 | register_hotcpu_notifier(&mce_cpu_notifier); |
2135 | misc_register(&mce_log_device); | 2118 | |
2119 | /* register character device /dev/mcelog */ | ||
2120 | misc_register(&mce_chrdev_device); | ||
2136 | 2121 | ||
2137 | return err; | 2122 | return err; |
2138 | } | 2123 | } |
2139 | |||
2140 | device_initcall(mcheck_init_device); | 2124 | device_initcall(mcheck_init_device); |
2141 | 2125 | ||
2142 | /* | 2126 | /* |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index bb0adad35143..f5474218cffe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -548,7 +548,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
548 | if (!b) | 548 | if (!b) |
549 | goto out; | 549 | goto out; |
550 | 550 | ||
551 | err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, | 551 | err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, |
552 | b->kobj, name); | 552 | b->kobj, name); |
553 | if (err) | 553 | if (err) |
554 | goto out; | 554 | goto out; |
@@ -571,7 +571,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
571 | goto out; | 571 | goto out; |
572 | } | 572 | } |
573 | 573 | ||
574 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); | 574 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); |
575 | if (!b->kobj) | 575 | if (!b->kobj) |
576 | goto out_free; | 576 | goto out_free; |
577 | 577 | ||
@@ -591,7 +591,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
591 | if (i == cpu) | 591 | if (i == cpu) |
592 | continue; | 592 | continue; |
593 | 593 | ||
594 | err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, | 594 | err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, |
595 | b->kobj, name); | 595 | b->kobj, name); |
596 | if (err) | 596 | if (err) |
597 | goto out; | 597 | goto out; |
@@ -669,7 +669,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
669 | #ifdef CONFIG_SMP | 669 | #ifdef CONFIG_SMP |
670 | /* sibling symlink */ | 670 | /* sibling symlink */ |
671 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 671 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
672 | sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); | 672 | sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); |
673 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 673 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
674 | 674 | ||
675 | return; | 675 | return; |
@@ -681,7 +681,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
681 | if (i == cpu) | 681 | if (i == cpu) |
682 | continue; | 682 | continue; |
683 | 683 | ||
684 | sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); | 684 | sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); |
685 | per_cpu(threshold_banks, i)[bank] = NULL; | 685 | per_cpu(threshold_banks, i)[bank] = NULL; |
686 | } | 686 | } |
687 | 687 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 929739a653d1..08119a37e53c 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -79,7 +79,6 @@ void set_mtrr_ops(const struct mtrr_ops *ops) | |||
79 | static int have_wrcomb(void) | 79 | static int have_wrcomb(void) |
80 | { | 80 | { |
81 | struct pci_dev *dev; | 81 | struct pci_dev *dev; |
82 | u8 rev; | ||
83 | 82 | ||
84 | dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); | 83 | dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); |
85 | if (dev != NULL) { | 84 | if (dev != NULL) { |
@@ -89,13 +88,11 @@ static int have_wrcomb(void) | |||
89 | * chipsets to be tagged | 88 | * chipsets to be tagged |
90 | */ | 89 | */ |
91 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && | 90 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && |
92 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { | 91 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && |
93 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | 92 | dev->revision <= 5) { |
94 | if (rev <= 5) { | 93 | pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); |
95 | pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); | 94 | pci_dev_put(dev); |
96 | pci_dev_put(dev); | 95 | return 0; |
97 | return 0; | ||
98 | } | ||
99 | } | 96 | } |
100 | /* | 97 | /* |
101 | * Intel 450NX errata # 23. Non ascending cacheline evictions to | 98 | * Intel 450NX errata # 23. Non ascending cacheline evictions to |
@@ -137,55 +134,43 @@ static void __init init_table(void) | |||
137 | } | 134 | } |
138 | 135 | ||
139 | struct set_mtrr_data { | 136 | struct set_mtrr_data { |
140 | atomic_t count; | ||
141 | atomic_t gate; | ||
142 | unsigned long smp_base; | 137 | unsigned long smp_base; |
143 | unsigned long smp_size; | 138 | unsigned long smp_size; |
144 | unsigned int smp_reg; | 139 | unsigned int smp_reg; |
145 | mtrr_type smp_type; | 140 | mtrr_type smp_type; |
146 | }; | 141 | }; |
147 | 142 | ||
148 | static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); | ||
149 | |||
150 | /** | 143 | /** |
151 | * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. | 144 | * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed |
145 | * by all the CPUs. | ||
152 | * @info: pointer to mtrr configuration data | 146 | * @info: pointer to mtrr configuration data |
153 | * | 147 | * |
154 | * Returns nothing. | 148 | * Returns nothing. |
155 | */ | 149 | */ |
156 | static int mtrr_work_handler(void *info) | 150 | static int mtrr_rendezvous_handler(void *info) |
157 | { | 151 | { |
158 | #ifdef CONFIG_SMP | 152 | #ifdef CONFIG_SMP |
159 | struct set_mtrr_data *data = info; | 153 | struct set_mtrr_data *data = info; |
160 | unsigned long flags; | ||
161 | |||
162 | atomic_dec(&data->count); | ||
163 | while (!atomic_read(&data->gate)) | ||
164 | cpu_relax(); | ||
165 | |||
166 | local_irq_save(flags); | ||
167 | |||
168 | atomic_dec(&data->count); | ||
169 | while (atomic_read(&data->gate)) | ||
170 | cpu_relax(); | ||
171 | 154 | ||
172 | /* The master has cleared me to execute */ | 155 | /* |
156 | * We use this same function to initialize the mtrrs during boot, | ||
157 | * resume, runtime cpu online and on an explicit request to set a | ||
158 | * specific MTRR. | ||
159 | * | ||
160 | * During boot or suspend, the state of the boot cpu's mtrrs has been | ||
161 | * saved, and we want to replicate that across all the cpus that come | ||
162 | * online (either at the end of boot or resume or during a runtime cpu | ||
163 | * online). If we're doing that, @reg is set to something special and on | ||
164 | * all the cpu's we do mtrr_if->set_all() (On the logical cpu that | ||
165 | * started the boot/resume sequence, this might be a duplicate | ||
166 | * set_all()). | ||
167 | */ | ||
173 | if (data->smp_reg != ~0U) { | 168 | if (data->smp_reg != ~0U) { |
174 | mtrr_if->set(data->smp_reg, data->smp_base, | 169 | mtrr_if->set(data->smp_reg, data->smp_base, |
175 | data->smp_size, data->smp_type); | 170 | data->smp_size, data->smp_type); |
176 | } else if (mtrr_aps_delayed_init) { | 171 | } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) { |
177 | /* | ||
178 | * Initialize the MTRRs inaddition to the synchronisation. | ||
179 | */ | ||
180 | mtrr_if->set_all(); | 172 | mtrr_if->set_all(); |
181 | } | 173 | } |
182 | |||
183 | atomic_dec(&data->count); | ||
184 | while (!atomic_read(&data->gate)) | ||
185 | cpu_relax(); | ||
186 | |||
187 | atomic_dec(&data->count); | ||
188 | local_irq_restore(flags); | ||
189 | #endif | 174 | #endif |
190 | return 0; | 175 | return 0; |
191 | } | 176 | } |
@@ -223,20 +208,11 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
223 | * 14. Wait for buddies to catch up | 208 | * 14. Wait for buddies to catch up |
224 | * 15. Enable interrupts. | 209 | * 15. Enable interrupts. |
225 | * | 210 | * |
226 | * What does that mean for us? Well, first we set data.count to the number | 211 | * What does that mean for us? Well, stop_machine() will ensure that |
227 | * of CPUs. As each CPU announces that it started the rendezvous handler by | 212 | * the rendezvous handler is started on each CPU. And in lockstep they |
228 | * decrementing the count, We reset data.count and set the data.gate flag | 213 | * do the state transition of disabling interrupts, updating MTRR's |
229 | * allowing all the cpu's to proceed with the work. As each cpu disables | 214 | * (the CPU vendors may each do it differently, so we call mtrr_if->set() |
230 | * interrupts, it'll decrement data.count once. We wait until it hits 0 and | 215 | * callback and let them take care of it.) and enabling interrupts. |
231 | * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they | ||
232 | * are waiting for that flag to be cleared. Once it's cleared, each | ||
233 | * CPU goes through the transition of updating MTRRs. | ||
234 | * The CPU vendors may each do it differently, | ||
235 | * so we call mtrr_if->set() callback and let them take care of it. | ||
236 | * When they're done, they again decrement data->count and wait for data.gate | ||
237 | * to be set. | ||
238 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | ||
239 | * Everyone then enables interrupts and we all continue on. | ||
240 | * | 216 | * |
241 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff | 217 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff |
242 | * becomes nops. | 218 | * becomes nops. |
@@ -244,92 +220,26 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
244 | static void | 220 | static void |
245 | set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) | 221 | set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) |
246 | { | 222 | { |
247 | struct set_mtrr_data data; | 223 | struct set_mtrr_data data = { .smp_reg = reg, |
248 | unsigned long flags; | 224 | .smp_base = base, |
249 | int cpu; | 225 | .smp_size = size, |
250 | 226 | .smp_type = type | |
251 | preempt_disable(); | 227 | }; |
252 | |||
253 | data.smp_reg = reg; | ||
254 | data.smp_base = base; | ||
255 | data.smp_size = size; | ||
256 | data.smp_type = type; | ||
257 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
258 | |||
259 | /* Make sure data.count is visible before unleashing other CPUs */ | ||
260 | smp_wmb(); | ||
261 | atomic_set(&data.gate, 0); | ||
262 | |||
263 | /* Start the ball rolling on other CPUs */ | ||
264 | for_each_online_cpu(cpu) { | ||
265 | struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); | ||
266 | |||
267 | if (cpu == smp_processor_id()) | ||
268 | continue; | ||
269 | |||
270 | stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); | ||
271 | } | ||
272 | |||
273 | |||
274 | while (atomic_read(&data.count)) | ||
275 | cpu_relax(); | ||
276 | |||
277 | /* Ok, reset count and toggle gate */ | ||
278 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
279 | smp_wmb(); | ||
280 | atomic_set(&data.gate, 1); | ||
281 | |||
282 | local_irq_save(flags); | ||
283 | |||
284 | while (atomic_read(&data.count)) | ||
285 | cpu_relax(); | ||
286 | |||
287 | /* Ok, reset count and toggle gate */ | ||
288 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
289 | smp_wmb(); | ||
290 | atomic_set(&data.gate, 0); | ||
291 | |||
292 | /* Do our MTRR business */ | ||
293 | |||
294 | /* | ||
295 | * HACK! | ||
296 | * | ||
297 | * We use this same function to initialize the mtrrs during boot, | ||
298 | * resume, runtime cpu online and on an explicit request to set a | ||
299 | * specific MTRR. | ||
300 | * | ||
301 | * During boot or suspend, the state of the boot cpu's mtrrs has been | ||
302 | * saved, and we want to replicate that across all the cpus that come | ||
303 | * online (either at the end of boot or resume or during a runtime cpu | ||
304 | * online). If we're doing that, @reg is set to something special and on | ||
305 | * this cpu we still do mtrr_if->set_all(). During boot/resume, this | ||
306 | * is unnecessary if at this point we are still on the cpu that started | ||
307 | * the boot/resume sequence. But there is no guarantee that we are still | ||
308 | * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be | ||
309 | * sure that we are in sync with everyone else. | ||
310 | */ | ||
311 | if (reg != ~0U) | ||
312 | mtrr_if->set(reg, base, size, type); | ||
313 | else | ||
314 | mtrr_if->set_all(); | ||
315 | 228 | ||
316 | /* Wait for the others */ | 229 | stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); |
317 | while (atomic_read(&data.count)) | 230 | } |
318 | cpu_relax(); | ||
319 | |||
320 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
321 | smp_wmb(); | ||
322 | atomic_set(&data.gate, 1); | ||
323 | |||
324 | /* | ||
325 | * Wait here for everyone to have seen the gate change | ||
326 | * So we're the last ones to touch 'data' | ||
327 | */ | ||
328 | while (atomic_read(&data.count)) | ||
329 | cpu_relax(); | ||
330 | 231 | ||
331 | local_irq_restore(flags); | 232 | static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, |
332 | preempt_enable(); | 233 | unsigned long size, mtrr_type type) |
234 | { | ||
235 | struct set_mtrr_data data = { .smp_reg = reg, | ||
236 | .smp_base = base, | ||
237 | .smp_size = size, | ||
238 | .smp_type = type | ||
239 | }; | ||
240 | |||
241 | stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data, | ||
242 | cpu_callout_mask); | ||
333 | } | 243 | } |
334 | 244 | ||
335 | /** | 245 | /** |
@@ -783,7 +693,7 @@ void mtrr_ap_init(void) | |||
783 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug | 693 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug |
784 | * lock to prevent mtrr entry changes | 694 | * lock to prevent mtrr entry changes |
785 | */ | 695 | */ |
786 | set_mtrr(~0U, 0, 0, 0); | 696 | set_mtrr_from_inactive_cpu(~0U, 0, 0, 0); |
787 | } | 697 | } |
788 | 698 | ||
789 | /** | 699 | /** |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3a0338b4b179..4ee3abf20ed6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/highmem.h> | ||
26 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
27 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
28 | 27 | ||
@@ -45,38 +44,27 @@ do { \ | |||
45 | #endif | 44 | #endif |
46 | 45 | ||
47 | /* | 46 | /* |
48 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | 47 | * | NHM/WSM | SNB | |
48 | * register ------------------------------- | ||
49 | * | HT | no HT | HT | no HT | | ||
50 | *----------------------------------------- | ||
51 | * offcore | core | core | cpu | core | | ||
52 | * lbr_sel | core | core | cpu | core | | ||
53 | * ld_lat | cpu | core | cpu | core | | ||
54 | *----------------------------------------- | ||
55 | * | ||
56 | * Given that there is a small number of shared regs, | ||
57 | * we can pre-allocate their slot in the per-cpu | ||
58 | * per-core reg tables. | ||
49 | */ | 59 | */ |
50 | static unsigned long | 60 | enum extra_reg_type { |
51 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | 61 | EXTRA_REG_NONE = -1, /* not used */ |
52 | { | ||
53 | unsigned long offset, addr = (unsigned long)from; | ||
54 | unsigned long size, len = 0; | ||
55 | struct page *page; | ||
56 | void *map; | ||
57 | int ret; | ||
58 | |||
59 | do { | ||
60 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
61 | if (!ret) | ||
62 | break; | ||
63 | |||
64 | offset = addr & (PAGE_SIZE - 1); | ||
65 | size = min(PAGE_SIZE - offset, n - len); | ||
66 | |||
67 | map = kmap_atomic(page); | ||
68 | memcpy(to, map+offset, size); | ||
69 | kunmap_atomic(map); | ||
70 | put_page(page); | ||
71 | 62 | ||
72 | len += size; | 63 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
73 | to += size; | 64 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
74 | addr += size; | ||
75 | 65 | ||
76 | } while (len < n); | 66 | EXTRA_REG_MAX /* number of entries needed */ |
77 | 67 | }; | |
78 | return len; | ||
79 | } | ||
80 | 68 | ||
81 | struct event_constraint { | 69 | struct event_constraint { |
82 | union { | 70 | union { |
@@ -132,11 +120,10 @@ struct cpu_hw_events { | |||
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 120 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
133 | 121 | ||
134 | /* | 122 | /* |
135 | * Intel percore register state. | 123 | * manage shared (per-core, per-cpu) registers |
136 | * Coordinate shared resources between HT threads. | 124 | * used on Intel NHM/WSM/SNB |
137 | */ | 125 | */ |
138 | int percore_used; /* Used by this CPU? */ | 126 | struct intel_shared_regs *shared_regs; |
139 | struct intel_percore *per_core; | ||
140 | 127 | ||
141 | /* | 128 | /* |
142 | * AMD specific bits | 129 | * AMD specific bits |
@@ -187,26 +174,45 @@ struct cpu_hw_events { | |||
187 | for ((e) = (c); (e)->weight; (e)++) | 174 | for ((e) = (c); (e)->weight; (e)++) |
188 | 175 | ||
189 | /* | 176 | /* |
177 | * Per register state. | ||
178 | */ | ||
179 | struct er_account { | ||
180 | raw_spinlock_t lock; /* per-core: protect structure */ | ||
181 | u64 config; /* extra MSR config */ | ||
182 | u64 reg; /* extra MSR number */ | ||
183 | atomic_t ref; /* reference count */ | ||
184 | }; | ||
185 | |||
186 | /* | ||
190 | * Extra registers for specific events. | 187 | * Extra registers for specific events. |
188 | * | ||
191 | * Some events need large masks and require external MSRs. | 189 | * Some events need large masks and require external MSRs. |
192 | * Define a mapping to these extra registers. | 190 | * Those extra MSRs end up being shared for all events on |
191 | * a PMU and sometimes between PMU of sibling HT threads. | ||
192 | * In either case, the kernel needs to handle conflicting | ||
193 | * accesses to those extra, shared, regs. The data structure | ||
194 | * to manage those registers is stored in cpu_hw_event. | ||
193 | */ | 195 | */ |
194 | struct extra_reg { | 196 | struct extra_reg { |
195 | unsigned int event; | 197 | unsigned int event; |
196 | unsigned int msr; | 198 | unsigned int msr; |
197 | u64 config_mask; | 199 | u64 config_mask; |
198 | u64 valid_mask; | 200 | u64 valid_mask; |
201 | int idx; /* per_xxx->regs[] reg index */ | ||
199 | }; | 202 | }; |
200 | 203 | ||
201 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | 204 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ |
202 | .event = (e), \ | 205 | .event = (e), \ |
203 | .msr = (ms), \ | 206 | .msr = (ms), \ |
204 | .config_mask = (m), \ | 207 | .config_mask = (m), \ |
205 | .valid_mask = (vm), \ | 208 | .valid_mask = (vm), \ |
209 | .idx = EXTRA_REG_##i \ | ||
206 | } | 210 | } |
207 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | 211 | |
208 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | 212 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
209 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | 213 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) |
214 | |||
215 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | ||
210 | 216 | ||
211 | union perf_capabilities { | 217 | union perf_capabilities { |
212 | struct { | 218 | struct { |
@@ -252,7 +258,6 @@ struct x86_pmu { | |||
252 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 258 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
253 | struct perf_event *event); | 259 | struct perf_event *event); |
254 | struct event_constraint *event_constraints; | 260 | struct event_constraint *event_constraints; |
255 | struct event_constraint *percore_constraints; | ||
256 | void (*quirks)(void); | 261 | void (*quirks)(void); |
257 | int perfctr_second_write; | 262 | int perfctr_second_write; |
258 | 263 | ||
@@ -286,8 +291,12 @@ struct x86_pmu { | |||
286 | * Extra registers for events | 291 | * Extra registers for events |
287 | */ | 292 | */ |
288 | struct extra_reg *extra_regs; | 293 | struct extra_reg *extra_regs; |
294 | unsigned int er_flags; | ||
289 | }; | 295 | }; |
290 | 296 | ||
297 | #define ERF_NO_HT_SHARING 1 | ||
298 | #define ERF_HAS_RSP_1 2 | ||
299 | |||
291 | static struct x86_pmu x86_pmu __read_mostly; | 300 | static struct x86_pmu x86_pmu __read_mostly; |
292 | 301 | ||
293 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 302 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
@@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index) | |||
393 | */ | 402 | */ |
394 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | 403 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) |
395 | { | 404 | { |
405 | struct hw_perf_event_extra *reg; | ||
396 | struct extra_reg *er; | 406 | struct extra_reg *er; |
397 | 407 | ||
398 | event->hw.extra_reg = 0; | 408 | reg = &event->hw.extra_reg; |
399 | event->hw.extra_config = 0; | ||
400 | 409 | ||
401 | if (!x86_pmu.extra_regs) | 410 | if (!x86_pmu.extra_regs) |
402 | return 0; | 411 | return 0; |
@@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | |||
406 | continue; | 415 | continue; |
407 | if (event->attr.config1 & ~er->valid_mask) | 416 | if (event->attr.config1 & ~er->valid_mask) |
408 | return -EINVAL; | 417 | return -EINVAL; |
409 | event->hw.extra_reg = er->msr; | 418 | |
410 | event->hw.extra_config = event->attr.config1; | 419 | reg->idx = er->idx; |
420 | reg->config = event->attr.config1; | ||
421 | reg->reg = er->msr; | ||
411 | break; | 422 | break; |
412 | } | 423 | } |
413 | return 0; | 424 | return 0; |
@@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
706 | event->hw.last_cpu = -1; | 717 | event->hw.last_cpu = -1; |
707 | event->hw.last_tag = ~0ULL; | 718 | event->hw.last_tag = ~0ULL; |
708 | 719 | ||
720 | /* mark unused */ | ||
721 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
722 | |||
709 | return x86_pmu.hw_config(event); | 723 | return x86_pmu.hw_config(event); |
710 | } | 724 | } |
711 | 725 | ||
@@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
747 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 761 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
748 | u64 enable_mask) | 762 | u64 enable_mask) |
749 | { | 763 | { |
750 | if (hwc->extra_reg) | 764 | if (hwc->extra_reg.reg) |
751 | wrmsrl(hwc->extra_reg, hwc->extra_config); | 765 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); |
752 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | 766 | wrmsrl(hwc->config_base, hwc->config | enable_mask); |
753 | } | 767 | } |
754 | 768 | ||
@@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1332 | if (!x86_perf_event_set_period(event)) | 1346 | if (!x86_perf_event_set_period(event)) |
1333 | continue; | 1347 | continue; |
1334 | 1348 | ||
1335 | if (perf_event_overflow(event, 1, &data, regs)) | 1349 | if (perf_event_overflow(event, &data, regs)) |
1336 | x86_pmu_stop(event, 0); | 1350 | x86_pmu_stop(event, 0); |
1337 | } | 1351 | } |
1338 | 1352 | ||
@@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu) | |||
1637 | perf_pmu_enable(pmu); | 1651 | perf_pmu_enable(pmu); |
1638 | return 0; | 1652 | return 0; |
1639 | } | 1653 | } |
1654 | /* | ||
1655 | * a fake_cpuc is used to validate event groups. Due to | ||
1656 | * the extra reg logic, we need to also allocate a fake | ||
1657 | * per_core and per_cpu structure. Otherwise, group events | ||
1658 | * using extra reg may conflict without the kernel being | ||
1659 | * able to catch this when the last event gets added to | ||
1660 | * the group. | ||
1661 | */ | ||
1662 | static void free_fake_cpuc(struct cpu_hw_events *cpuc) | ||
1663 | { | ||
1664 | kfree(cpuc->shared_regs); | ||
1665 | kfree(cpuc); | ||
1666 | } | ||
1667 | |||
1668 | static struct cpu_hw_events *allocate_fake_cpuc(void) | ||
1669 | { | ||
1670 | struct cpu_hw_events *cpuc; | ||
1671 | int cpu = raw_smp_processor_id(); | ||
1672 | |||
1673 | cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); | ||
1674 | if (!cpuc) | ||
1675 | return ERR_PTR(-ENOMEM); | ||
1676 | |||
1677 | /* only needed, if we have extra_regs */ | ||
1678 | if (x86_pmu.extra_regs) { | ||
1679 | cpuc->shared_regs = allocate_shared_regs(cpu); | ||
1680 | if (!cpuc->shared_regs) | ||
1681 | goto error; | ||
1682 | } | ||
1683 | return cpuc; | ||
1684 | error: | ||
1685 | free_fake_cpuc(cpuc); | ||
1686 | return ERR_PTR(-ENOMEM); | ||
1687 | } | ||
1640 | 1688 | ||
1641 | /* | 1689 | /* |
1642 | * validate that we can schedule this event | 1690 | * validate that we can schedule this event |
@@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event) | |||
1647 | struct event_constraint *c; | 1695 | struct event_constraint *c; |
1648 | int ret = 0; | 1696 | int ret = 0; |
1649 | 1697 | ||
1650 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | 1698 | fake_cpuc = allocate_fake_cpuc(); |
1651 | if (!fake_cpuc) | 1699 | if (IS_ERR(fake_cpuc)) |
1652 | return -ENOMEM; | 1700 | return PTR_ERR(fake_cpuc); |
1653 | 1701 | ||
1654 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | 1702 | c = x86_pmu.get_event_constraints(fake_cpuc, event); |
1655 | 1703 | ||
@@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event) | |||
1659 | if (x86_pmu.put_event_constraints) | 1707 | if (x86_pmu.put_event_constraints) |
1660 | x86_pmu.put_event_constraints(fake_cpuc, event); | 1708 | x86_pmu.put_event_constraints(fake_cpuc, event); |
1661 | 1709 | ||
1662 | kfree(fake_cpuc); | 1710 | free_fake_cpuc(fake_cpuc); |
1663 | 1711 | ||
1664 | return ret; | 1712 | return ret; |
1665 | } | 1713 | } |
@@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event) | |||
1679 | { | 1727 | { |
1680 | struct perf_event *leader = event->group_leader; | 1728 | struct perf_event *leader = event->group_leader; |
1681 | struct cpu_hw_events *fake_cpuc; | 1729 | struct cpu_hw_events *fake_cpuc; |
1682 | int ret, n; | 1730 | int ret = -ENOSPC, n; |
1683 | |||
1684 | ret = -ENOMEM; | ||
1685 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1686 | if (!fake_cpuc) | ||
1687 | goto out; | ||
1688 | 1731 | ||
1732 | fake_cpuc = allocate_fake_cpuc(); | ||
1733 | if (IS_ERR(fake_cpuc)) | ||
1734 | return PTR_ERR(fake_cpuc); | ||
1689 | /* | 1735 | /* |
1690 | * the event is not yet connected with its | 1736 | * the event is not yet connected with its |
1691 | * siblings therefore we must first collect | 1737 | * siblings therefore we must first collect |
1692 | * existing siblings, then add the new event | 1738 | * existing siblings, then add the new event |
1693 | * before we can simulate the scheduling | 1739 | * before we can simulate the scheduling |
1694 | */ | 1740 | */ |
1695 | ret = -ENOSPC; | ||
1696 | n = collect_events(fake_cpuc, leader, true); | 1741 | n = collect_events(fake_cpuc, leader, true); |
1697 | if (n < 0) | 1742 | if (n < 0) |
1698 | goto out_free; | 1743 | goto out; |
1699 | 1744 | ||
1700 | fake_cpuc->n_events = n; | 1745 | fake_cpuc->n_events = n; |
1701 | n = collect_events(fake_cpuc, event, false); | 1746 | n = collect_events(fake_cpuc, event, false); |
1702 | if (n < 0) | 1747 | if (n < 0) |
1703 | goto out_free; | 1748 | goto out; |
1704 | 1749 | ||
1705 | fake_cpuc->n_events = n; | 1750 | fake_cpuc->n_events = n; |
1706 | 1751 | ||
1707 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); | 1752 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1708 | 1753 | ||
1709 | out_free: | ||
1710 | kfree(fake_cpuc); | ||
1711 | out: | 1754 | out: |
1755 | free_fake_cpuc(fake_cpuc); | ||
1712 | return ret; | 1756 | return ret; |
1713 | } | 1757 | } |
1714 | 1758 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index fe29c1d2219e..941caa2e449b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -89,6 +89,20 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
89 | [ C(RESULT_MISS) ] = -1, | 89 | [ C(RESULT_MISS) ] = -1, |
90 | }, | 90 | }, |
91 | }, | 91 | }, |
92 | [ C(NODE) ] = { | ||
93 | [ C(OP_READ) ] = { | ||
94 | [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ | ||
95 | [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ | ||
96 | }, | ||
97 | [ C(OP_WRITE) ] = { | ||
98 | [ C(RESULT_ACCESS) ] = -1, | ||
99 | [ C(RESULT_MISS) ] = -1, | ||
100 | }, | ||
101 | [ C(OP_PREFETCH) ] = { | ||
102 | [ C(RESULT_ACCESS) ] = -1, | ||
103 | [ C(RESULT_MISS) ] = -1, | ||
104 | }, | ||
105 | }, | ||
92 | }; | 106 | }; |
93 | 107 | ||
94 | /* | 108 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 41178c826c48..45fbb8f7f549 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,25 +1,15 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | 3 | /* |
15 | * Per core state | 4 | * Per core/cpu state |
16 | * This used to coordinate shared registers for HT threads. | 5 | * |
6 | * Used to coordinate shared registers between HT threads or | ||
7 | * among events on a single PMU. | ||
17 | */ | 8 | */ |
18 | struct intel_percore { | 9 | struct intel_shared_regs { |
19 | raw_spinlock_t lock; /* protect structure */ | 10 | struct er_account regs[EXTRA_REG_MAX]; |
20 | struct er_account regs[MAX_EXTRA_REGS]; | 11 | int refcnt; /* per-core: #HT threads */ |
21 | int refcnt; /* number of threads */ | 12 | unsigned core_id; /* per-core: core id */ |
22 | unsigned core_id; | ||
23 | }; | 13 | }; |
24 | 14 | ||
25 | /* | 15 | /* |
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
88 | 78 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = | 79 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
90 | { | 80 | { |
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 81 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
92 | EVENT_EXTRA_END | 82 | EVENT_EXTRA_END |
93 | }; | 83 | }; |
94 | 84 | ||
95 | static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
101 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = | 85 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
102 | { | 86 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -116,8 +100,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 100 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
117 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 101 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ |
118 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | 102 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
119 | INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ | ||
120 | INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ | ||
121 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | 103 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
122 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | 104 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
123 | EVENT_CONSTRAINT_END | 105 | EVENT_CONSTRAINT_END |
@@ -125,15 +107,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
125 | 107 | ||
126 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = | 108 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
127 | { | 109 | { |
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | 110 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | 111 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
130 | EVENT_EXTRA_END | 112 | EVENT_EXTRA_END |
131 | }; | 113 | }; |
132 | 114 | ||
133 | static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = | 115 | static struct event_constraint intel_v1_event_constraints[] __read_mostly = |
134 | { | 116 | { |
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | 117 | EVENT_CONSTRAINT_END |
138 | }; | 118 | }; |
139 | 119 | ||
@@ -145,6 +125,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
145 | EVENT_CONSTRAINT_END | 125 | EVENT_CONSTRAINT_END |
146 | }; | 126 | }; |
147 | 127 | ||
128 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { | ||
129 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), | ||
130 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), | ||
131 | EVENT_EXTRA_END | ||
132 | }; | ||
133 | |||
148 | static u64 intel_pmu_event_map(int hw_event) | 134 | static u64 intel_pmu_event_map(int hw_event) |
149 | { | 135 | { |
150 | return intel_perfmon_event_map[hw_event]; | 136 | return intel_perfmon_event_map[hw_event]; |
@@ -245,6 +231,21 @@ static __initconst const u64 snb_hw_cache_event_ids | |||
245 | [ C(RESULT_MISS) ] = -1, | 231 | [ C(RESULT_MISS) ] = -1, |
246 | }, | 232 | }, |
247 | }, | 233 | }, |
234 | [ C(NODE) ] = { | ||
235 | [ C(OP_READ) ] = { | ||
236 | [ C(RESULT_ACCESS) ] = -1, | ||
237 | [ C(RESULT_MISS) ] = -1, | ||
238 | }, | ||
239 | [ C(OP_WRITE) ] = { | ||
240 | [ C(RESULT_ACCESS) ] = -1, | ||
241 | [ C(RESULT_MISS) ] = -1, | ||
242 | }, | ||
243 | [ C(OP_PREFETCH) ] = { | ||
244 | [ C(RESULT_ACCESS) ] = -1, | ||
245 | [ C(RESULT_MISS) ] = -1, | ||
246 | }, | ||
247 | }, | ||
248 | |||
248 | }; | 249 | }; |
249 | 250 | ||
250 | static __initconst const u64 westmere_hw_cache_event_ids | 251 | static __initconst const u64 westmere_hw_cache_event_ids |
@@ -346,6 +347,20 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
346 | [ C(RESULT_MISS) ] = -1, | 347 | [ C(RESULT_MISS) ] = -1, |
347 | }, | 348 | }, |
348 | }, | 349 | }, |
350 | [ C(NODE) ] = { | ||
351 | [ C(OP_READ) ] = { | ||
352 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
353 | [ C(RESULT_MISS) ] = 0x01b7, | ||
354 | }, | ||
355 | [ C(OP_WRITE) ] = { | ||
356 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
357 | [ C(RESULT_MISS) ] = 0x01b7, | ||
358 | }, | ||
359 | [ C(OP_PREFETCH) ] = { | ||
360 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
361 | [ C(RESULT_MISS) ] = 0x01b7, | ||
362 | }, | ||
363 | }, | ||
349 | }; | 364 | }; |
350 | 365 | ||
351 | /* | 366 | /* |
@@ -398,7 +413,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs | |||
398 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, | 413 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, |
399 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, | 414 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, |
400 | }, | 415 | }, |
401 | } | 416 | }, |
417 | [ C(NODE) ] = { | ||
418 | [ C(OP_READ) ] = { | ||
419 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM, | ||
420 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM, | ||
421 | }, | ||
422 | [ C(OP_WRITE) ] = { | ||
423 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM, | ||
424 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM, | ||
425 | }, | ||
426 | [ C(OP_PREFETCH) ] = { | ||
427 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM, | ||
428 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM, | ||
429 | }, | ||
430 | }, | ||
402 | }; | 431 | }; |
403 | 432 | ||
404 | static __initconst const u64 nehalem_hw_cache_event_ids | 433 | static __initconst const u64 nehalem_hw_cache_event_ids |
@@ -500,6 +529,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
500 | [ C(RESULT_MISS) ] = -1, | 529 | [ C(RESULT_MISS) ] = -1, |
501 | }, | 530 | }, |
502 | }, | 531 | }, |
532 | [ C(NODE) ] = { | ||
533 | [ C(OP_READ) ] = { | ||
534 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
535 | [ C(RESULT_MISS) ] = 0x01b7, | ||
536 | }, | ||
537 | [ C(OP_WRITE) ] = { | ||
538 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
539 | [ C(RESULT_MISS) ] = 0x01b7, | ||
540 | }, | ||
541 | [ C(OP_PREFETCH) ] = { | ||
542 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
543 | [ C(RESULT_MISS) ] = 0x01b7, | ||
544 | }, | ||
545 | }, | ||
503 | }; | 546 | }; |
504 | 547 | ||
505 | static __initconst const u64 core2_hw_cache_event_ids | 548 | static __initconst const u64 core2_hw_cache_event_ids |
@@ -1003,7 +1046,7 @@ again: | |||
1003 | 1046 | ||
1004 | data.period = event->hw.last_period; | 1047 | data.period = event->hw.last_period; |
1005 | 1048 | ||
1006 | if (perf_event_overflow(event, 1, &data, regs)) | 1049 | if (perf_event_overflow(event, &data, regs)) |
1007 | x86_pmu_stop(event, 0); | 1050 | x86_pmu_stop(event, 0); |
1008 | } | 1051 | } |
1009 | 1052 | ||
@@ -1037,65 +1080,121 @@ intel_bts_constraints(struct perf_event *event) | |||
1037 | return NULL; | 1080 | return NULL; |
1038 | } | 1081 | } |
1039 | 1082 | ||
1083 | static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | ||
1084 | { | ||
1085 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) | ||
1086 | return false; | ||
1087 | |||
1088 | if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { | ||
1089 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
1090 | event->hw.config |= 0x01bb; | ||
1091 | event->hw.extra_reg.idx = EXTRA_REG_RSP_1; | ||
1092 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | ||
1093 | } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { | ||
1094 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
1095 | event->hw.config |= 0x01b7; | ||
1096 | event->hw.extra_reg.idx = EXTRA_REG_RSP_0; | ||
1097 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; | ||
1098 | } | ||
1099 | |||
1100 | if (event->hw.extra_reg.idx == orig_idx) | ||
1101 | return false; | ||
1102 | |||
1103 | return true; | ||
1104 | } | ||
1105 | |||
1106 | /* | ||
1107 | * manage allocation of shared extra msr for certain events | ||
1108 | * | ||
1109 | * sharing can be: | ||
1110 | * per-cpu: to be shared between the various events on a single PMU | ||
1111 | * per-core: per-cpu + shared by HT threads | ||
1112 | */ | ||
1040 | static struct event_constraint * | 1113 | static struct event_constraint * |
1041 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1114 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1115 | struct perf_event *event) | ||
1042 | { | 1116 | { |
1043 | struct hw_perf_event *hwc = &event->hw; | 1117 | struct event_constraint *c = &emptyconstraint; |
1044 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | 1118 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; |
1045 | struct event_constraint *c; | ||
1046 | struct intel_percore *pc; | ||
1047 | struct er_account *era; | 1119 | struct er_account *era; |
1048 | int i; | 1120 | unsigned long flags; |
1049 | int free_slot; | 1121 | int orig_idx = reg->idx; |
1050 | int found; | ||
1051 | 1122 | ||
1052 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | 1123 | /* already allocated shared msr */ |
1053 | return NULL; | 1124 | if (reg->alloc) |
1125 | return &unconstrained; | ||
1054 | 1126 | ||
1055 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | 1127 | again: |
1056 | if (e != c->code) | 1128 | era = &cpuc->shared_regs->regs[reg->idx]; |
1057 | continue; | 1129 | /* |
1130 | * we use spin_lock_irqsave() to avoid lockdep issues when | ||
1131 | * passing a fake cpuc | ||
1132 | */ | ||
1133 | raw_spin_lock_irqsave(&era->lock, flags); | ||
1134 | |||
1135 | if (!atomic_read(&era->ref) || era->config == reg->config) { | ||
1136 | |||
1137 | /* lock in msr value */ | ||
1138 | era->config = reg->config; | ||
1139 | era->reg = reg->reg; | ||
1140 | |||
1141 | /* one more user */ | ||
1142 | atomic_inc(&era->ref); | ||
1143 | |||
1144 | /* no need to reallocate during incremental event scheduling */ | ||
1145 | reg->alloc = 1; | ||
1058 | 1146 | ||
1059 | /* | 1147 | /* |
1060 | * Allocate resource per core. | 1148 | * All events using extra_reg are unconstrained. |
1149 | * Avoids calling x86_get_event_constraints() | ||
1150 | * | ||
1151 | * Must revisit if extra_reg controlling events | ||
1152 | * ever have constraints. Worst case we go through | ||
1153 | * the regular event constraint table. | ||
1061 | */ | 1154 | */ |
1062 | pc = cpuc->per_core; | 1155 | c = &unconstrained; |
1063 | if (!pc) | 1156 | } else if (intel_try_alt_er(event, orig_idx)) { |
1064 | break; | 1157 | raw_spin_unlock(&era->lock); |
1065 | c = &emptyconstraint; | 1158 | goto again; |
1066 | raw_spin_lock(&pc->lock); | ||
1067 | free_slot = -1; | ||
1068 | found = 0; | ||
1069 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1070 | era = &pc->regs[i]; | ||
1071 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
1072 | /* Allow sharing same config */ | ||
1073 | if (hwc->extra_config == era->extra_config) { | ||
1074 | era->ref++; | ||
1075 | cpuc->percore_used = 1; | ||
1076 | hwc->extra_alloc = 1; | ||
1077 | c = NULL; | ||
1078 | } | ||
1079 | /* else conflict */ | ||
1080 | found = 1; | ||
1081 | break; | ||
1082 | } else if (era->ref == 0 && free_slot == -1) | ||
1083 | free_slot = i; | ||
1084 | } | ||
1085 | if (!found && free_slot != -1) { | ||
1086 | era = &pc->regs[free_slot]; | ||
1087 | era->ref = 1; | ||
1088 | era->extra_reg = hwc->extra_reg; | ||
1089 | era->extra_config = hwc->extra_config; | ||
1090 | cpuc->percore_used = 1; | ||
1091 | hwc->extra_alloc = 1; | ||
1092 | c = NULL; | ||
1093 | } | ||
1094 | raw_spin_unlock(&pc->lock); | ||
1095 | return c; | ||
1096 | } | 1159 | } |
1160 | raw_spin_unlock_irqrestore(&era->lock, flags); | ||
1097 | 1161 | ||
1098 | return NULL; | 1162 | return c; |
1163 | } | ||
1164 | |||
1165 | static void | ||
1166 | __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | ||
1167 | struct hw_perf_event_extra *reg) | ||
1168 | { | ||
1169 | struct er_account *era; | ||
1170 | |||
1171 | /* | ||
1172 | * only put constraint if extra reg was actually | ||
1173 | * allocated. Also takes care of event which do | ||
1174 | * not use an extra shared reg | ||
1175 | */ | ||
1176 | if (!reg->alloc) | ||
1177 | return; | ||
1178 | |||
1179 | era = &cpuc->shared_regs->regs[reg->idx]; | ||
1180 | |||
1181 | /* one fewer user */ | ||
1182 | atomic_dec(&era->ref); | ||
1183 | |||
1184 | /* allocate again next time */ | ||
1185 | reg->alloc = 0; | ||
1186 | } | ||
1187 | |||
1188 | static struct event_constraint * | ||
1189 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | ||
1190 | struct perf_event *event) | ||
1191 | { | ||
1192 | struct event_constraint *c = NULL; | ||
1193 | |||
1194 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | ||
1195 | c = __intel_shared_reg_get_constraints(cpuc, event); | ||
1196 | |||
1197 | return c; | ||
1099 | } | 1198 | } |
1100 | 1199 | ||
1101 | static struct event_constraint * | 1200 | static struct event_constraint * |
@@ -1111,49 +1210,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
1111 | if (c) | 1210 | if (c) |
1112 | return c; | 1211 | return c; |
1113 | 1212 | ||
1114 | c = intel_percore_constraints(cpuc, event); | 1213 | c = intel_shared_regs_constraints(cpuc, event); |
1115 | if (c) | 1214 | if (c) |
1116 | return c; | 1215 | return c; |
1117 | 1216 | ||
1118 | return x86_get_event_constraints(cpuc, event); | 1217 | return x86_get_event_constraints(cpuc, event); |
1119 | } | 1218 | } |
1120 | 1219 | ||
1121 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1220 | static void |
1221 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | ||
1122 | struct perf_event *event) | 1222 | struct perf_event *event) |
1123 | { | 1223 | { |
1124 | struct extra_reg *er; | 1224 | struct hw_perf_event_extra *reg; |
1125 | struct intel_percore *pc; | ||
1126 | struct er_account *era; | ||
1127 | struct hw_perf_event *hwc = &event->hw; | ||
1128 | int i, allref; | ||
1129 | 1225 | ||
1130 | if (!cpuc->percore_used) | 1226 | reg = &event->hw.extra_reg; |
1131 | return; | 1227 | if (reg->idx != EXTRA_REG_NONE) |
1132 | 1228 | __intel_shared_reg_put_constraints(cpuc, reg); | |
1133 | for (er = x86_pmu.extra_regs; er->msr; er++) { | 1229 | } |
1134 | if (er->event != (hwc->config & er->config_mask)) | ||
1135 | continue; | ||
1136 | 1230 | ||
1137 | pc = cpuc->per_core; | 1231 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
1138 | raw_spin_lock(&pc->lock); | 1232 | struct perf_event *event) |
1139 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | 1233 | { |
1140 | era = &pc->regs[i]; | 1234 | intel_put_shared_regs_event_constraints(cpuc, event); |
1141 | if (era->ref > 0 && | ||
1142 | era->extra_config == hwc->extra_config && | ||
1143 | era->extra_reg == er->msr) { | ||
1144 | era->ref--; | ||
1145 | hwc->extra_alloc = 0; | ||
1146 | break; | ||
1147 | } | ||
1148 | } | ||
1149 | allref = 0; | ||
1150 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1151 | allref += pc->regs[i].ref; | ||
1152 | if (allref == 0) | ||
1153 | cpuc->percore_used = 0; | ||
1154 | raw_spin_unlock(&pc->lock); | ||
1155 | break; | ||
1156 | } | ||
1157 | } | 1235 | } |
1158 | 1236 | ||
1159 | static int intel_pmu_hw_config(struct perf_event *event) | 1237 | static int intel_pmu_hw_config(struct perf_event *event) |
@@ -1231,20 +1309,36 @@ static __initconst const struct x86_pmu core_pmu = { | |||
1231 | .event_constraints = intel_core_event_constraints, | 1309 | .event_constraints = intel_core_event_constraints, |
1232 | }; | 1310 | }; |
1233 | 1311 | ||
1312 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
1313 | { | ||
1314 | struct intel_shared_regs *regs; | ||
1315 | int i; | ||
1316 | |||
1317 | regs = kzalloc_node(sizeof(struct intel_shared_regs), | ||
1318 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1319 | if (regs) { | ||
1320 | /* | ||
1321 | * initialize the locks to keep lockdep happy | ||
1322 | */ | ||
1323 | for (i = 0; i < EXTRA_REG_MAX; i++) | ||
1324 | raw_spin_lock_init(®s->regs[i].lock); | ||
1325 | |||
1326 | regs->core_id = -1; | ||
1327 | } | ||
1328 | return regs; | ||
1329 | } | ||
1330 | |||
1234 | static int intel_pmu_cpu_prepare(int cpu) | 1331 | static int intel_pmu_cpu_prepare(int cpu) |
1235 | { | 1332 | { |
1236 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1333 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1237 | 1334 | ||
1238 | if (!cpu_has_ht_siblings()) | 1335 | if (!x86_pmu.extra_regs) |
1239 | return NOTIFY_OK; | 1336 | return NOTIFY_OK; |
1240 | 1337 | ||
1241 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | 1338 | cpuc->shared_regs = allocate_shared_regs(cpu); |
1242 | GFP_KERNEL, cpu_to_node(cpu)); | 1339 | if (!cpuc->shared_regs) |
1243 | if (!cpuc->per_core) | ||
1244 | return NOTIFY_BAD; | 1340 | return NOTIFY_BAD; |
1245 | 1341 | ||
1246 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1247 | cpuc->per_core->core_id = -1; | ||
1248 | return NOTIFY_OK; | 1342 | return NOTIFY_OK; |
1249 | } | 1343 | } |
1250 | 1344 | ||
@@ -1260,32 +1354,34 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1260 | */ | 1354 | */ |
1261 | intel_pmu_lbr_reset(); | 1355 | intel_pmu_lbr_reset(); |
1262 | 1356 | ||
1263 | if (!cpu_has_ht_siblings()) | 1357 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) |
1264 | return; | 1358 | return; |
1265 | 1359 | ||
1266 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1360 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1267 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | 1361 | struct intel_shared_regs *pc; |
1268 | 1362 | ||
1363 | pc = per_cpu(cpu_hw_events, i).shared_regs; | ||
1269 | if (pc && pc->core_id == core_id) { | 1364 | if (pc && pc->core_id == core_id) { |
1270 | kfree(cpuc->per_core); | 1365 | kfree(cpuc->shared_regs); |
1271 | cpuc->per_core = pc; | 1366 | cpuc->shared_regs = pc; |
1272 | break; | 1367 | break; |
1273 | } | 1368 | } |
1274 | } | 1369 | } |
1275 | 1370 | ||
1276 | cpuc->per_core->core_id = core_id; | 1371 | cpuc->shared_regs->core_id = core_id; |
1277 | cpuc->per_core->refcnt++; | 1372 | cpuc->shared_regs->refcnt++; |
1278 | } | 1373 | } |
1279 | 1374 | ||
1280 | static void intel_pmu_cpu_dying(int cpu) | 1375 | static void intel_pmu_cpu_dying(int cpu) |
1281 | { | 1376 | { |
1282 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1377 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1283 | struct intel_percore *pc = cpuc->per_core; | 1378 | struct intel_shared_regs *pc; |
1284 | 1379 | ||
1380 | pc = cpuc->shared_regs; | ||
1285 | if (pc) { | 1381 | if (pc) { |
1286 | if (pc->core_id == -1 || --pc->refcnt == 0) | 1382 | if (pc->core_id == -1 || --pc->refcnt == 0) |
1287 | kfree(pc); | 1383 | kfree(pc); |
1288 | cpuc->per_core = NULL; | 1384 | cpuc->shared_regs = NULL; |
1289 | } | 1385 | } |
1290 | 1386 | ||
1291 | fini_debug_store_on_cpu(cpu); | 1387 | fini_debug_store_on_cpu(cpu); |
@@ -1436,7 +1532,6 @@ static __init int intel_pmu_init(void) | |||
1436 | 1532 | ||
1437 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1533 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1438 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | 1534 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
1439 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1440 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1535 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1441 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1536 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1442 | 1537 | ||
@@ -1481,10 +1576,10 @@ static __init int intel_pmu_init(void) | |||
1481 | intel_pmu_lbr_init_nhm(); | 1576 | intel_pmu_lbr_init_nhm(); |
1482 | 1577 | ||
1483 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1578 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1484 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1485 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1579 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1486 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | 1580 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
1487 | x86_pmu.extra_regs = intel_westmere_extra_regs; | 1581 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
1582 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
1488 | 1583 | ||
1489 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1584 | /* UOPS_ISSUED.STALLED_CYCLES */ |
1490 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1585 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; |
@@ -1502,6 +1597,10 @@ static __init int intel_pmu_init(void) | |||
1502 | 1597 | ||
1503 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1598 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1504 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | 1599 | x86_pmu.pebs_constraints = intel_snb_pebs_events; |
1600 | x86_pmu.extra_regs = intel_snb_extra_regs; | ||
1601 | /* all extra regs are per-cpu when HT is on */ | ||
1602 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
1603 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | ||
1505 | 1604 | ||
1506 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 1605 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
1507 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1606 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; |
@@ -1512,11 +1611,19 @@ static __init int intel_pmu_init(void) | |||
1512 | break; | 1611 | break; |
1513 | 1612 | ||
1514 | default: | 1613 | default: |
1515 | /* | 1614 | switch (x86_pmu.version) { |
1516 | * default constraints for v2 and up | 1615 | case 1: |
1517 | */ | 1616 | x86_pmu.event_constraints = intel_v1_event_constraints; |
1518 | x86_pmu.event_constraints = intel_gen_event_constraints; | 1617 | pr_cont("generic architected perfmon v1, "); |
1519 | pr_cont("generic architected perfmon, "); | 1618 | break; |
1619 | default: | ||
1620 | /* | ||
1621 | * default constraints for v2 and up | ||
1622 | */ | ||
1623 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
1624 | pr_cont("generic architected perfmon, "); | ||
1625 | break; | ||
1626 | } | ||
1520 | } | 1627 | } |
1521 | return 0; | 1628 | return 0; |
1522 | } | 1629 | } |
@@ -1528,4 +1635,8 @@ static int intel_pmu_init(void) | |||
1528 | return 0; | 1635 | return 0; |
1529 | } | 1636 | } |
1530 | 1637 | ||
1638 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
1639 | { | ||
1640 | return NULL; | ||
1641 | } | ||
1531 | #endif /* CONFIG_CPU_SUP_INTEL */ | 1642 | #endif /* CONFIG_CPU_SUP_INTEL */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index bab491b8ee25..1b1ef3addcfd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void) | |||
340 | */ | 340 | */ |
341 | perf_prepare_sample(&header, &data, event, ®s); | 341 | perf_prepare_sample(&header, &data, event, ®s); |
342 | 342 | ||
343 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | 343 | if (perf_output_begin(&handle, event, header.size * (top - at))) |
344 | return 1; | 344 | return 1; |
345 | 345 | ||
346 | for (; at < top; at++) { | 346 | for (; at < top; at++) { |
@@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
616 | else | 616 | else |
617 | regs.flags &= ~PERF_EFLAGS_EXACT; | 617 | regs.flags &= ~PERF_EFLAGS_EXACT; |
618 | 618 | ||
619 | if (perf_event_overflow(event, 1, &data, ®s)) | 619 | if (perf_event_overflow(event, &data, ®s)) |
620 | x86_pmu_stop(event, 0); | 620 | x86_pmu_stop(event, 0); |
621 | } | 621 | } |
622 | 622 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ead584fb6a7d..7809d2bcb209 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -554,13 +554,102 @@ static __initconst const u64 p4_hw_cache_event_ids | |||
554 | [ C(RESULT_MISS) ] = -1, | 554 | [ C(RESULT_MISS) ] = -1, |
555 | }, | 555 | }, |
556 | }, | 556 | }, |
557 | [ C(NODE) ] = { | ||
558 | [ C(OP_READ) ] = { | ||
559 | [ C(RESULT_ACCESS) ] = -1, | ||
560 | [ C(RESULT_MISS) ] = -1, | ||
561 | }, | ||
562 | [ C(OP_WRITE) ] = { | ||
563 | [ C(RESULT_ACCESS) ] = -1, | ||
564 | [ C(RESULT_MISS) ] = -1, | ||
565 | }, | ||
566 | [ C(OP_PREFETCH) ] = { | ||
567 | [ C(RESULT_ACCESS) ] = -1, | ||
568 | [ C(RESULT_MISS) ] = -1, | ||
569 | }, | ||
570 | }, | ||
557 | }; | 571 | }; |
558 | 572 | ||
573 | /* | ||
574 | * Because of Netburst being quite restricted in how many | ||
575 | * identical events may run simultaneously, we introduce event aliases, | ||
576 | * ie the different events which have the same functionality but | ||
577 | * utilize non-intersected resources (ESCR/CCCR/counter registers). | ||
578 | * | ||
579 | * This allow us to relax restrictions a bit and run two or more | ||
580 | * identical events together. | ||
581 | * | ||
582 | * Never set any custom internal bits such as P4_CONFIG_HT, | ||
583 | * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are | ||
584 | * either up to date automatically or not applicable at all. | ||
585 | */ | ||
586 | struct p4_event_alias { | ||
587 | u64 original; | ||
588 | u64 alternative; | ||
589 | } p4_event_aliases[] = { | ||
590 | { | ||
591 | /* | ||
592 | * Non-halted cycles can be substituted with non-sleeping cycles (see | ||
593 | * Intel SDM Vol3b for details). We need this alias to be able | ||
594 | * to run nmi-watchdog and 'perf top' (or any other user space tool | ||
595 | * which is interested in running PERF_COUNT_HW_CPU_CYCLES) | ||
596 | * simultaneously. | ||
597 | */ | ||
598 | .original = | ||
599 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
600 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
601 | .alternative = | ||
602 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) | | ||
603 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)| | ||
604 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)| | ||
605 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)| | ||
606 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)| | ||
607 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | | ||
608 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | | ||
609 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | | ||
610 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))| | ||
611 | p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | | ||
612 | P4_CCCR_COMPARE), | ||
613 | }, | ||
614 | }; | ||
615 | |||
616 | static u64 p4_get_alias_event(u64 config) | ||
617 | { | ||
618 | u64 config_match; | ||
619 | int i; | ||
620 | |||
621 | /* | ||
622 | * Only event with special mark is allowed, | ||
623 | * we're to be sure it didn't come as malformed | ||
624 | * RAW event. | ||
625 | */ | ||
626 | if (!(config & P4_CONFIG_ALIASABLE)) | ||
627 | return 0; | ||
628 | |||
629 | config_match = config & P4_CONFIG_EVENT_ALIAS_MASK; | ||
630 | |||
631 | for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) { | ||
632 | if (config_match == p4_event_aliases[i].original) { | ||
633 | config_match = p4_event_aliases[i].alternative; | ||
634 | break; | ||
635 | } else if (config_match == p4_event_aliases[i].alternative) { | ||
636 | config_match = p4_event_aliases[i].original; | ||
637 | break; | ||
638 | } | ||
639 | } | ||
640 | |||
641 | if (i >= ARRAY_SIZE(p4_event_aliases)) | ||
642 | return 0; | ||
643 | |||
644 | return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS); | ||
645 | } | ||
646 | |||
559 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | 647 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { |
560 | /* non-halted CPU clocks */ | 648 | /* non-halted CPU clocks */ |
561 | [PERF_COUNT_HW_CPU_CYCLES] = | 649 | [PERF_COUNT_HW_CPU_CYCLES] = |
562 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | 650 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | |
563 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | 651 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) | |
652 | P4_CONFIG_ALIASABLE, | ||
564 | 653 | ||
565 | /* | 654 | /* |
566 | * retired instructions | 655 | * retired instructions |
@@ -945,7 +1034,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
945 | 1034 | ||
946 | if (!x86_perf_event_set_period(event)) | 1035 | if (!x86_perf_event_set_period(event)) |
947 | continue; | 1036 | continue; |
948 | if (perf_event_overflow(event, 1, &data, regs)) | 1037 | if (perf_event_overflow(event, &data, regs)) |
949 | x86_pmu_stop(event, 0); | 1038 | x86_pmu_stop(event, 0); |
950 | } | 1039 | } |
951 | 1040 | ||
@@ -1120,6 +1209,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
1120 | struct p4_event_bind *bind; | 1209 | struct p4_event_bind *bind; |
1121 | unsigned int i, thread, num; | 1210 | unsigned int i, thread, num; |
1122 | int cntr_idx, escr_idx; | 1211 | int cntr_idx, escr_idx; |
1212 | u64 config_alias; | ||
1213 | int pass; | ||
1123 | 1214 | ||
1124 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 1215 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1125 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | 1216 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); |
@@ -1128,6 +1219,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
1128 | 1219 | ||
1129 | hwc = &cpuc->event_list[i]->hw; | 1220 | hwc = &cpuc->event_list[i]->hw; |
1130 | thread = p4_ht_thread(cpu); | 1221 | thread = p4_ht_thread(cpu); |
1222 | pass = 0; | ||
1223 | |||
1224 | again: | ||
1225 | /* | ||
1226 | * It's possible to hit a circular lock | ||
1227 | * between original and alternative events | ||
1228 | * if both are scheduled already. | ||
1229 | */ | ||
1230 | if (pass > 2) | ||
1231 | goto done; | ||
1232 | |||
1131 | bind = p4_config_get_bind(hwc->config); | 1233 | bind = p4_config_get_bind(hwc->config); |
1132 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | 1234 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); |
1133 | if (unlikely(escr_idx == -1)) | 1235 | if (unlikely(escr_idx == -1)) |
@@ -1141,8 +1243,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign | |||
1141 | } | 1243 | } |
1142 | 1244 | ||
1143 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | 1245 | cntr_idx = p4_next_cntr(thread, used_mask, bind); |
1144 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | 1246 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) { |
1145 | goto done; | 1247 | /* |
1248 | * Check whether an event alias is still available. | ||
1249 | */ | ||
1250 | config_alias = p4_get_alias_event(hwc->config); | ||
1251 | if (!config_alias) | ||
1252 | goto done; | ||
1253 | hwc->config = config_alias; | ||
1254 | pass++; | ||
1255 | goto again; | ||
1256 | } | ||
1146 | 1257 | ||
1147 | p4_pmu_swap_config_ts(hwc, cpu); | 1258 | p4_pmu_swap_config_ts(hwc, cpu); |
1148 | if (assign) | 1259 | if (assign) |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 9aeb78a23de4..a621f3427685 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
@@ -134,6 +134,24 @@ static int __init add_bus_probe(void) | |||
134 | module_init(add_bus_probe); | 134 | module_init(add_bus_probe); |
135 | 135 | ||
136 | #ifdef CONFIG_PCI | 136 | #ifdef CONFIG_PCI |
137 | struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) | ||
138 | { | ||
139 | struct device_node *np; | ||
140 | |||
141 | for_each_node_by_type(np, "pci") { | ||
142 | const void *prop; | ||
143 | unsigned int bus_min; | ||
144 | |||
145 | prop = of_get_property(np, "bus-range", NULL); | ||
146 | if (!prop) | ||
147 | continue; | ||
148 | bus_min = be32_to_cpup(prop); | ||
149 | if (bus->number == bus_min) | ||
150 | return np; | ||
151 | } | ||
152 | return NULL; | ||
153 | } | ||
154 | |||
137 | static int x86_of_pci_irq_enable(struct pci_dev *dev) | 155 | static int x86_of_pci_irq_enable(struct pci_dev *dev) |
138 | { | 156 | { |
139 | struct of_irq oirq; | 157 | struct of_irq oirq; |
@@ -165,50 +183,8 @@ static void x86_of_pci_irq_disable(struct pci_dev *dev) | |||
165 | 183 | ||
166 | void __cpuinit x86_of_pci_init(void) | 184 | void __cpuinit x86_of_pci_init(void) |
167 | { | 185 | { |
168 | struct device_node *np; | ||
169 | |||
170 | pcibios_enable_irq = x86_of_pci_irq_enable; | 186 | pcibios_enable_irq = x86_of_pci_irq_enable; |
171 | pcibios_disable_irq = x86_of_pci_irq_disable; | 187 | pcibios_disable_irq = x86_of_pci_irq_disable; |
172 | |||
173 | for_each_node_by_type(np, "pci") { | ||
174 | const void *prop; | ||
175 | struct pci_bus *bus; | ||
176 | unsigned int bus_min; | ||
177 | struct device_node *child; | ||
178 | |||
179 | prop = of_get_property(np, "bus-range", NULL); | ||
180 | if (!prop) | ||
181 | continue; | ||
182 | bus_min = be32_to_cpup(prop); | ||
183 | |||
184 | bus = pci_find_bus(0, bus_min); | ||
185 | if (!bus) { | ||
186 | printk(KERN_ERR "Can't find a node for bus %s.\n", | ||
187 | np->full_name); | ||
188 | continue; | ||
189 | } | ||
190 | |||
191 | if (bus->self) | ||
192 | bus->self->dev.of_node = np; | ||
193 | else | ||
194 | bus->dev.of_node = np; | ||
195 | |||
196 | for_each_child_of_node(np, child) { | ||
197 | struct pci_dev *dev; | ||
198 | u32 devfn; | ||
199 | |||
200 | prop = of_get_property(child, "reg", NULL); | ||
201 | if (!prop) | ||
202 | continue; | ||
203 | |||
204 | devfn = (be32_to_cpup(prop) >> 8) & 0xff; | ||
205 | dev = pci_get_slot(bus, devfn); | ||
206 | if (!dev) | ||
207 | continue; | ||
208 | dev->dev.of_node = child; | ||
209 | pci_dev_put(dev); | ||
210 | } | ||
211 | } | ||
212 | } | 188 | } |
213 | #endif | 189 | #endif |
214 | 190 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index e71c98d3c0d2..19853ad8afc5 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -105,34 +105,6 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack, | |||
105 | } | 105 | } |
106 | 106 | ||
107 | /* | 107 | /* |
108 | * We are returning from the irq stack and go to the previous one. | ||
109 | * If the previous stack is also in the irq stack, then bp in the first | ||
110 | * frame of the irq stack points to the previous, interrupted one. | ||
111 | * Otherwise we have another level of indirection: We first save | ||
112 | * the bp of the previous stack, then we switch the stack to the irq one | ||
113 | * and save a new bp that links to the previous one. | ||
114 | * (See save_args()) | ||
115 | */ | ||
116 | static inline unsigned long | ||
117 | fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | ||
118 | unsigned long *irq_stack, unsigned long *irq_stack_end) | ||
119 | { | ||
120 | #ifdef CONFIG_FRAME_POINTER | ||
121 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
122 | unsigned long next; | ||
123 | |||
124 | if (!in_irq_stack(stack, irq_stack, irq_stack_end)) { | ||
125 | if (!probe_kernel_address(&frame->next_frame, next)) | ||
126 | return next; | ||
127 | else | ||
128 | WARN_ONCE(1, "Perf: bad frame pointer = %p in " | ||
129 | "callchain\n", &frame->next_frame); | ||
130 | } | ||
131 | #endif | ||
132 | return bp; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * x86-64 can have up to three kernel stacks: | 108 | * x86-64 can have up to three kernel stacks: |
137 | * process stack | 109 | * process stack |
138 | * interrupt stack | 110 | * interrupt stack |
@@ -155,9 +127,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
155 | task = current; | 127 | task = current; |
156 | 128 | ||
157 | if (!stack) { | 129 | if (!stack) { |
158 | stack = &dummy; | 130 | if (regs) |
159 | if (task && task != current) | 131 | stack = (unsigned long *)regs->sp; |
132 | else if (task && task != current) | ||
160 | stack = (unsigned long *)task->thread.sp; | 133 | stack = (unsigned long *)task->thread.sp; |
134 | else | ||
135 | stack = &dummy; | ||
161 | } | 136 | } |
162 | 137 | ||
163 | if (!bp) | 138 | if (!bp) |
@@ -205,8 +180,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
205 | * pointer (index -1 to end) in the IRQ stack: | 180 | * pointer (index -1 to end) in the IRQ stack: |
206 | */ | 181 | */ |
207 | stack = (unsigned long *) (irq_stack_end[-1]); | 182 | stack = (unsigned long *) (irq_stack_end[-1]); |
208 | bp = fixup_bp_irq_link(bp, stack, irq_stack, | ||
209 | irq_stack_end); | ||
210 | irq_stack_end = NULL; | 183 | irq_stack_end = NULL; |
211 | ops->stack(data, "EOI"); | 184 | ops->stack(data, "EOI"); |
212 | continue; | 185 | continue; |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8a445a0c989e..e13329d800c8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -9,6 +9,8 @@ | |||
9 | /* | 9 | /* |
10 | * entry.S contains the system-call and fault low-level handling routines. | 10 | * entry.S contains the system-call and fault low-level handling routines. |
11 | * | 11 | * |
12 | * Some of this is documented in Documentation/x86/entry_64.txt | ||
13 | * | ||
12 | * NOTE: This code handles signal-recognition, which happens every time | 14 | * NOTE: This code handles signal-recognition, which happens every time |
13 | * after an interrupt and after each system call. | 15 | * after an interrupt and after each system call. |
14 | * | 16 | * |
@@ -297,27 +299,26 @@ ENDPROC(native_usergs_sysret64) | |||
297 | .endm | 299 | .endm |
298 | 300 | ||
299 | /* save partial stack frame */ | 301 | /* save partial stack frame */ |
300 | .pushsection .kprobes.text, "ax" | 302 | .macro SAVE_ARGS_IRQ |
301 | ENTRY(save_args) | ||
302 | XCPT_FRAME | ||
303 | cld | 303 | cld |
304 | /* | 304 | /* start from rbp in pt_regs and jump over */ |
305 | * start from rbp in pt_regs and jump over | 305 | movq_cfi rdi, RDI-RBP |
306 | * return address. | 306 | movq_cfi rsi, RSI-RBP |
307 | */ | 307 | movq_cfi rdx, RDX-RBP |
308 | movq_cfi rdi, RDI+8-RBP | 308 | movq_cfi rcx, RCX-RBP |
309 | movq_cfi rsi, RSI+8-RBP | 309 | movq_cfi rax, RAX-RBP |
310 | movq_cfi rdx, RDX+8-RBP | 310 | movq_cfi r8, R8-RBP |
311 | movq_cfi rcx, RCX+8-RBP | 311 | movq_cfi r9, R9-RBP |
312 | movq_cfi rax, RAX+8-RBP | 312 | movq_cfi r10, R10-RBP |
313 | movq_cfi r8, R8+8-RBP | 313 | movq_cfi r11, R11-RBP |
314 | movq_cfi r9, R9+8-RBP | 314 | |
315 | movq_cfi r10, R10+8-RBP | 315 | /* Save rbp so that we can unwind from get_irq_regs() */ |
316 | movq_cfi r11, R11+8-RBP | 316 | movq_cfi rbp, 0 |
317 | 317 | ||
318 | leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ | 318 | /* Save previous stack value */ |
319 | movq_cfi rbp, 8 /* push %rbp */ | 319 | movq %rsp, %rsi |
320 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ | 320 | |
321 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | ||
321 | testl $3, CS(%rdi) | 322 | testl $3, CS(%rdi) |
322 | je 1f | 323 | je 1f |
323 | SWAPGS | 324 | SWAPGS |
@@ -329,19 +330,14 @@ ENTRY(save_args) | |||
329 | */ | 330 | */ |
330 | 1: incl PER_CPU_VAR(irq_count) | 331 | 1: incl PER_CPU_VAR(irq_count) |
331 | jne 2f | 332 | jne 2f |
332 | popq_cfi %rax /* move return address... */ | ||
333 | mov PER_CPU_VAR(irq_stack_ptr),%rsp | 333 | mov PER_CPU_VAR(irq_stack_ptr),%rsp |
334 | EMPTY_FRAME 0 | 334 | EMPTY_FRAME 0 |
335 | pushq_cfi %rbp /* backlink for unwinder */ | 335 | |
336 | pushq_cfi %rax /* ... to the new stack */ | 336 | 2: /* Store previous stack value */ |
337 | /* | 337 | pushq %rsi |
338 | * We entered an interrupt context - irqs are off: | 338 | /* We entered an interrupt context - irqs are off: */ |
339 | */ | 339 | TRACE_IRQS_OFF |
340 | 2: TRACE_IRQS_OFF | 340 | .endm |
341 | ret | ||
342 | CFI_ENDPROC | ||
343 | END(save_args) | ||
344 | .popsection | ||
345 | 341 | ||
346 | ENTRY(save_rest) | 342 | ENTRY(save_rest) |
347 | PARTIAL_FRAME 1 REST_SKIP+8 | 343 | PARTIAL_FRAME 1 REST_SKIP+8 |
@@ -473,7 +469,7 @@ ENTRY(system_call_after_swapgs) | |||
473 | * and short: | 469 | * and short: |
474 | */ | 470 | */ |
475 | ENABLE_INTERRUPTS(CLBR_NONE) | 471 | ENABLE_INTERRUPTS(CLBR_NONE) |
476 | SAVE_ARGS 8,1 | 472 | SAVE_ARGS 8,0 |
477 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 473 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
478 | movq %rcx,RIP-ARGOFFSET(%rsp) | 474 | movq %rcx,RIP-ARGOFFSET(%rsp) |
479 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 475 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
@@ -508,7 +504,7 @@ sysret_check: | |||
508 | TRACE_IRQS_ON | 504 | TRACE_IRQS_ON |
509 | movq RIP-ARGOFFSET(%rsp),%rcx | 505 | movq RIP-ARGOFFSET(%rsp),%rcx |
510 | CFI_REGISTER rip,rcx | 506 | CFI_REGISTER rip,rcx |
511 | RESTORE_ARGS 0,-ARG_SKIP,1 | 507 | RESTORE_ARGS 1,-ARG_SKIP,0 |
512 | /*CFI_REGISTER rflags,r11*/ | 508 | /*CFI_REGISTER rflags,r11*/ |
513 | movq PER_CPU_VAR(old_rsp), %rsp | 509 | movq PER_CPU_VAR(old_rsp), %rsp |
514 | USERGS_SYSRET64 | 510 | USERGS_SYSRET64 |
@@ -791,7 +787,7 @@ END(interrupt) | |||
791 | /* reserve pt_regs for scratch regs and rbp */ | 787 | /* reserve pt_regs for scratch regs and rbp */ |
792 | subq $ORIG_RAX-RBP, %rsp | 788 | subq $ORIG_RAX-RBP, %rsp |
793 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | 789 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP |
794 | call save_args | 790 | SAVE_ARGS_IRQ |
795 | PARTIAL_FRAME 0 | 791 | PARTIAL_FRAME 0 |
796 | call \func | 792 | call \func |
797 | .endm | 793 | .endm |
@@ -814,15 +810,14 @@ ret_from_intr: | |||
814 | DISABLE_INTERRUPTS(CLBR_NONE) | 810 | DISABLE_INTERRUPTS(CLBR_NONE) |
815 | TRACE_IRQS_OFF | 811 | TRACE_IRQS_OFF |
816 | decl PER_CPU_VAR(irq_count) | 812 | decl PER_CPU_VAR(irq_count) |
817 | leaveq | ||
818 | 813 | ||
819 | CFI_RESTORE rbp | 814 | /* Restore saved previous stack */ |
815 | popq %rsi | ||
816 | leaq 16(%rsi), %rsp | ||
817 | |||
820 | CFI_DEF_CFA_REGISTER rsp | 818 | CFI_DEF_CFA_REGISTER rsp |
821 | CFI_ADJUST_CFA_OFFSET -8 | 819 | CFI_ADJUST_CFA_OFFSET -16 |
822 | 820 | ||
823 | /* we did not save rbx, restore only from ARGOFFSET */ | ||
824 | addq $8, %rsp | ||
825 | CFI_ADJUST_CFA_OFFSET -8 | ||
826 | exit_intr: | 821 | exit_intr: |
827 | GET_THREAD_INFO(%rcx) | 822 | GET_THREAD_INFO(%rcx) |
828 | testl $3,CS-ARGOFFSET(%rsp) | 823 | testl $3,CS-ARGOFFSET(%rsp) |
@@ -858,7 +853,7 @@ retint_restore_args: /* return to kernel space */ | |||
858 | */ | 853 | */ |
859 | TRACE_IRQS_IRETQ | 854 | TRACE_IRQS_IRETQ |
860 | restore_args: | 855 | restore_args: |
861 | RESTORE_ARGS 0,8,0 | 856 | RESTORE_ARGS 1,8,1 |
862 | 857 | ||
863 | irq_return: | 858 | irq_return: |
864 | INTERRUPT_RETURN | 859 | INTERRUPT_RETURN |
@@ -991,11 +986,6 @@ apicinterrupt THRESHOLD_APIC_VECTOR \ | |||
991 | apicinterrupt THERMAL_APIC_VECTOR \ | 986 | apicinterrupt THERMAL_APIC_VECTOR \ |
992 | thermal_interrupt smp_thermal_interrupt | 987 | thermal_interrupt smp_thermal_interrupt |
993 | 988 | ||
994 | #ifdef CONFIG_X86_MCE | ||
995 | apicinterrupt MCE_SELF_VECTOR \ | ||
996 | mce_self_interrupt smp_mce_self_interrupt | ||
997 | #endif | ||
998 | |||
999 | #ifdef CONFIG_SMP | 989 | #ifdef CONFIG_SMP |
1000 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | 990 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ |
1001 | call_function_single_interrupt smp_call_function_single_interrupt | 991 | call_function_single_interrupt smp_call_function_single_interrupt |
@@ -1121,6 +1111,8 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | |||
1121 | zeroentry coprocessor_error do_coprocessor_error | 1111 | zeroentry coprocessor_error do_coprocessor_error |
1122 | errorentry alignment_check do_alignment_check | 1112 | errorentry alignment_check do_alignment_check |
1123 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1113 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1114 | zeroentry emulate_vsyscall do_emulate_vsyscall | ||
1115 | |||
1124 | 1116 | ||
1125 | /* Reload gs selector with exception handling */ | 1117 | /* Reload gs selector with exception handling */ |
1126 | /* edi: new selector */ | 1118 | /* edi: new selector */ |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 6781765b3a0d..4aecc54236a9 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/sysdev.h> | 4 | #include <linux/sysdev.h> |
5 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/i8253.h> | ||
7 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
8 | #include <linux/hpet.h> | 9 | #include <linux/hpet.h> |
9 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -12,8 +13,8 @@ | |||
12 | #include <linux/io.h> | 13 | #include <linux/io.h> |
13 | 14 | ||
14 | #include <asm/fixmap.h> | 15 | #include <asm/fixmap.h> |
15 | #include <asm/i8253.h> | ||
16 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
17 | #include <asm/time.h> | ||
17 | 18 | ||
18 | #define HPET_MASK CLOCKSOURCE_MASK(32) | 19 | #define HPET_MASK CLOCKSOURCE_MASK(32) |
19 | 20 | ||
@@ -71,7 +72,7 @@ static inline void hpet_set_mapping(void) | |||
71 | { | 72 | { |
72 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | 73 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); |
73 | #ifdef CONFIG_X86_64 | 74 | #ifdef CONFIG_X86_64 |
74 | __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); | 75 | __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE); |
75 | #endif | 76 | #endif |
76 | } | 77 | } |
77 | 78 | ||
@@ -738,13 +739,6 @@ static cycle_t read_hpet(struct clocksource *cs) | |||
738 | return (cycle_t)hpet_readl(HPET_COUNTER); | 739 | return (cycle_t)hpet_readl(HPET_COUNTER); |
739 | } | 740 | } |
740 | 741 | ||
741 | #ifdef CONFIG_X86_64 | ||
742 | static cycle_t __vsyscall_fn vread_hpet(void) | ||
743 | { | ||
744 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); | ||
745 | } | ||
746 | #endif | ||
747 | |||
748 | static struct clocksource clocksource_hpet = { | 742 | static struct clocksource clocksource_hpet = { |
749 | .name = "hpet", | 743 | .name = "hpet", |
750 | .rating = 250, | 744 | .rating = 250, |
@@ -753,7 +747,7 @@ static struct clocksource clocksource_hpet = { | |||
753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 747 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
754 | .resume = hpet_resume_counter, | 748 | .resume = hpet_resume_counter, |
755 | #ifdef CONFIG_X86_64 | 749 | #ifdef CONFIG_X86_64 |
756 | .vread = vread_hpet, | 750 | .archdata = { .vclock_mode = VCLOCK_HPET }, |
757 | #endif | 751 | #endif |
758 | }; | 752 | }; |
759 | 753 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 12aff2537682..739d8598f789 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -321,7 +321,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) | |||
321 | return tmp; | 321 | return tmp; |
322 | } | 322 | } |
323 | 323 | ||
324 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); | 324 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) |
325 | #define FP_EXP_TAG_VALID 0 | 325 | #define FP_EXP_TAG_VALID 0 |
326 | #define FP_EXP_TAG_ZERO 1 | 326 | #define FP_EXP_TAG_ZERO 1 |
327 | #define FP_EXP_TAG_SPECIAL 2 | 327 | #define FP_EXP_TAG_SPECIAL 2 |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index fb66dc9e36cb..f2b96de3c7c1 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -3,113 +3,24 @@ | |||
3 | * | 3 | * |
4 | */ | 4 | */ |
5 | #include <linux/clockchips.h> | 5 | #include <linux/clockchips.h> |
6 | #include <linux/interrupt.h> | ||
7 | #include <linux/spinlock.h> | ||
8 | #include <linux/jiffies.h> | ||
9 | #include <linux/module.h> | 6 | #include <linux/module.h> |
10 | #include <linux/timex.h> | 7 | #include <linux/timex.h> |
11 | #include <linux/delay.h> | 8 | #include <linux/i8253.h> |
12 | #include <linux/init.h> | ||
13 | #include <linux/io.h> | ||
14 | 9 | ||
15 | #include <asm/i8253.h> | ||
16 | #include <asm/hpet.h> | 10 | #include <asm/hpet.h> |
11 | #include <asm/time.h> | ||
17 | #include <asm/smp.h> | 12 | #include <asm/smp.h> |
18 | 13 | ||
19 | DEFINE_RAW_SPINLOCK(i8253_lock); | ||
20 | EXPORT_SYMBOL(i8253_lock); | ||
21 | |||
22 | /* | 14 | /* |
23 | * HPET replaces the PIT, when enabled. So we need to know, which of | 15 | * HPET replaces the PIT, when enabled. So we need to know, which of |
24 | * the two timers is used | 16 | * the two timers is used |
25 | */ | 17 | */ |
26 | struct clock_event_device *global_clock_event; | 18 | struct clock_event_device *global_clock_event; |
27 | 19 | ||
28 | /* | ||
29 | * Initialize the PIT timer. | ||
30 | * | ||
31 | * This is also called after resume to bring the PIT into operation again. | ||
32 | */ | ||
33 | static void init_pit_timer(enum clock_event_mode mode, | ||
34 | struct clock_event_device *evt) | ||
35 | { | ||
36 | raw_spin_lock(&i8253_lock); | ||
37 | |||
38 | switch (mode) { | ||
39 | case CLOCK_EVT_MODE_PERIODIC: | ||
40 | /* binary, mode 2, LSB/MSB, ch 0 */ | ||
41 | outb_pit(0x34, PIT_MODE); | ||
42 | outb_pit(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
43 | outb_pit(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
44 | break; | ||
45 | |||
46 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
47 | case CLOCK_EVT_MODE_UNUSED: | ||
48 | if (evt->mode == CLOCK_EVT_MODE_PERIODIC || | ||
49 | evt->mode == CLOCK_EVT_MODE_ONESHOT) { | ||
50 | outb_pit(0x30, PIT_MODE); | ||
51 | outb_pit(0, PIT_CH0); | ||
52 | outb_pit(0, PIT_CH0); | ||
53 | } | ||
54 | break; | ||
55 | |||
56 | case CLOCK_EVT_MODE_ONESHOT: | ||
57 | /* One shot setup */ | ||
58 | outb_pit(0x38, PIT_MODE); | ||
59 | break; | ||
60 | |||
61 | case CLOCK_EVT_MODE_RESUME: | ||
62 | /* Nothing to do here */ | ||
63 | break; | ||
64 | } | ||
65 | raw_spin_unlock(&i8253_lock); | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Program the next event in oneshot mode | ||
70 | * | ||
71 | * Delta is given in PIT ticks | ||
72 | */ | ||
73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | ||
74 | { | ||
75 | raw_spin_lock(&i8253_lock); | ||
76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ | ||
77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ | ||
78 | raw_spin_unlock(&i8253_lock); | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * On UP the PIT can serve all of the possible timer functions. On SMP systems | ||
85 | * it can be solely used for the global tick. | ||
86 | * | ||
87 | * The profiling and update capabilities are switched off once the local apic is | ||
88 | * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - | ||
89 | * !using_apic_timer decisions in do_timer_interrupt_hook() | ||
90 | */ | ||
91 | static struct clock_event_device pit_ce = { | ||
92 | .name = "pit", | ||
93 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
94 | .set_mode = init_pit_timer, | ||
95 | .set_next_event = pit_next_event, | ||
96 | .irq = 0, | ||
97 | }; | ||
98 | |||
99 | /* | ||
100 | * Initialize the conversion factor and the min/max deltas of the clock event | ||
101 | * structure and register the clock event source with the framework. | ||
102 | */ | ||
103 | void __init setup_pit_timer(void) | 20 | void __init setup_pit_timer(void) |
104 | { | 21 | { |
105 | /* | 22 | clockevent_i8253_init(true); |
106 | * Start pit with the boot cpu mask and make it global after the | 23 | global_clock_event = &i8253_clockevent; |
107 | * IO_APIC has been initialized. | ||
108 | */ | ||
109 | pit_ce.cpumask = cpumask_of(smp_processor_id()); | ||
110 | |||
111 | clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF); | ||
112 | global_clock_event = &pit_ce; | ||
113 | } | 24 | } |
114 | 25 | ||
115 | #ifndef CONFIG_X86_64 | 26 | #ifndef CONFIG_X86_64 |
@@ -123,7 +34,7 @@ static int __init init_pit_clocksource(void) | |||
123 | * - when local APIC timer is active (PIT is switched off) | 34 | * - when local APIC timer is active (PIT is switched off) |
124 | */ | 35 | */ |
125 | if (num_possible_cpus() > 1 || is_hpet_enabled() || | 36 | if (num_possible_cpus() > 1 || is_hpet_enabled() || |
126 | pit_ce.mode != CLOCK_EVT_MODE_PERIODIC) | 37 | i8253_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) |
127 | return 0; | 38 | return 0; |
128 | 39 | ||
129 | return clocksource_i8253_init(); | 40 | return clocksource_i8253_init(); |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f470e4ef993e..f09d4bbe2d2d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -272,9 +272,6 @@ static void __init apic_intr_init(void) | |||
272 | #ifdef CONFIG_X86_MCE_THRESHOLD | 272 | #ifdef CONFIG_X86_MCE_THRESHOLD |
273 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 273 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
274 | #endif | 274 | #endif |
275 | #if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC) | ||
276 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); | ||
277 | #endif | ||
278 | 275 | ||
279 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | 276 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
280 | /* self generated IPI for local APIC timer */ | 277 | /* self generated IPI for local APIC timer */ |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5f9ecff328b5..00354d4919a9 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -608,7 +608,7 @@ int kgdb_arch_init(void) | |||
608 | return register_die_notifier(&kgdb_notifier); | 608 | return register_die_notifier(&kgdb_notifier); |
609 | } | 609 | } |
610 | 610 | ||
611 | static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, | 611 | static void kgdb_hw_overflow_handler(struct perf_event *event, |
612 | struct perf_sample_data *data, struct pt_regs *regs) | 612 | struct perf_sample_data *data, struct pt_regs *regs) |
613 | { | 613 | { |
614 | struct task_struct *tsk = current; | 614 | struct task_struct *tsk = current; |
@@ -638,7 +638,7 @@ void kgdb_arch_late(void) | |||
638 | for (i = 0; i < HBP_NUM; i++) { | 638 | for (i = 0; i < HBP_NUM; i++) { |
639 | if (breakinfo[i].pev) | 639 | if (breakinfo[i].pev) |
640 | continue; | 640 | continue; |
641 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | 641 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL); |
642 | if (IS_ERR((void * __force)breakinfo[i].pev)) { | 642 | if (IS_ERR((void * __force)breakinfo[i].pev)) { |
643 | printk(KERN_ERR "kgdb: Could not allocate hw" | 643 | printk(KERN_ERR "kgdb: Could not allocate hw" |
644 | "breakpoints\nDisabling the kernel debugger\n"); | 644 | "breakpoints\nDisabling the kernel debugger\n"); |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 33c07b0b122e..a9c2116001d6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -51,6 +51,15 @@ static int parse_no_kvmapf(char *arg) | |||
51 | 51 | ||
52 | early_param("no-kvmapf", parse_no_kvmapf); | 52 | early_param("no-kvmapf", parse_no_kvmapf); |
53 | 53 | ||
54 | static int steal_acc = 1; | ||
55 | static int parse_no_stealacc(char *arg) | ||
56 | { | ||
57 | steal_acc = 0; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | early_param("no-steal-acc", parse_no_stealacc); | ||
62 | |||
54 | struct kvm_para_state { | 63 | struct kvm_para_state { |
55 | u8 mmu_queue[MMU_QUEUE_SIZE]; | 64 | u8 mmu_queue[MMU_QUEUE_SIZE]; |
56 | int mmu_queue_len; | 65 | int mmu_queue_len; |
@@ -58,6 +67,8 @@ struct kvm_para_state { | |||
58 | 67 | ||
59 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | 68 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); |
60 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | 69 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); |
70 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); | ||
71 | static int has_steal_clock = 0; | ||
61 | 72 | ||
62 | static struct kvm_para_state *kvm_para_state(void) | 73 | static struct kvm_para_state *kvm_para_state(void) |
63 | { | 74 | { |
@@ -441,6 +452,21 @@ static void __init paravirt_ops_setup(void) | |||
441 | #endif | 452 | #endif |
442 | } | 453 | } |
443 | 454 | ||
455 | static void kvm_register_steal_time(void) | ||
456 | { | ||
457 | int cpu = smp_processor_id(); | ||
458 | struct kvm_steal_time *st = &per_cpu(steal_time, cpu); | ||
459 | |||
460 | if (!has_steal_clock) | ||
461 | return; | ||
462 | |||
463 | memset(st, 0, sizeof(*st)); | ||
464 | |||
465 | wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); | ||
466 | printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", | ||
467 | cpu, __pa(st)); | ||
468 | } | ||
469 | |||
444 | void __cpuinit kvm_guest_cpu_init(void) | 470 | void __cpuinit kvm_guest_cpu_init(void) |
445 | { | 471 | { |
446 | if (!kvm_para_available()) | 472 | if (!kvm_para_available()) |
@@ -457,6 +483,9 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
457 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", | 483 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", |
458 | smp_processor_id()); | 484 | smp_processor_id()); |
459 | } | 485 | } |
486 | |||
487 | if (has_steal_clock) | ||
488 | kvm_register_steal_time(); | ||
460 | } | 489 | } |
461 | 490 | ||
462 | static void kvm_pv_disable_apf(void *unused) | 491 | static void kvm_pv_disable_apf(void *unused) |
@@ -483,6 +512,31 @@ static struct notifier_block kvm_pv_reboot_nb = { | |||
483 | .notifier_call = kvm_pv_reboot_notify, | 512 | .notifier_call = kvm_pv_reboot_notify, |
484 | }; | 513 | }; |
485 | 514 | ||
515 | static u64 kvm_steal_clock(int cpu) | ||
516 | { | ||
517 | u64 steal; | ||
518 | struct kvm_steal_time *src; | ||
519 | int version; | ||
520 | |||
521 | src = &per_cpu(steal_time, cpu); | ||
522 | do { | ||
523 | version = src->version; | ||
524 | rmb(); | ||
525 | steal = src->steal; | ||
526 | rmb(); | ||
527 | } while ((version & 1) || (version != src->version)); | ||
528 | |||
529 | return steal; | ||
530 | } | ||
531 | |||
532 | void kvm_disable_steal_time(void) | ||
533 | { | ||
534 | if (!has_steal_clock) | ||
535 | return; | ||
536 | |||
537 | wrmsr(MSR_KVM_STEAL_TIME, 0, 0); | ||
538 | } | ||
539 | |||
486 | #ifdef CONFIG_SMP | 540 | #ifdef CONFIG_SMP |
487 | static void __init kvm_smp_prepare_boot_cpu(void) | 541 | static void __init kvm_smp_prepare_boot_cpu(void) |
488 | { | 542 | { |
@@ -500,6 +554,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) | |||
500 | 554 | ||
501 | static void kvm_guest_cpu_offline(void *dummy) | 555 | static void kvm_guest_cpu_offline(void *dummy) |
502 | { | 556 | { |
557 | kvm_disable_steal_time(); | ||
503 | kvm_pv_disable_apf(NULL); | 558 | kvm_pv_disable_apf(NULL); |
504 | apf_task_wake_all(); | 559 | apf_task_wake_all(); |
505 | } | 560 | } |
@@ -548,6 +603,11 @@ void __init kvm_guest_init(void) | |||
548 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) | 603 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) |
549 | x86_init.irqs.trap_init = kvm_apf_trap_init; | 604 | x86_init.irqs.trap_init = kvm_apf_trap_init; |
550 | 605 | ||
606 | if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { | ||
607 | has_steal_clock = 1; | ||
608 | pv_time_ops.steal_clock = kvm_steal_clock; | ||
609 | } | ||
610 | |||
551 | #ifdef CONFIG_SMP | 611 | #ifdef CONFIG_SMP |
552 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 612 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
553 | register_cpu_notifier(&kvm_cpu_notifier); | 613 | register_cpu_notifier(&kvm_cpu_notifier); |
@@ -555,3 +615,15 @@ void __init kvm_guest_init(void) | |||
555 | kvm_guest_cpu_init(); | 615 | kvm_guest_cpu_init(); |
556 | #endif | 616 | #endif |
557 | } | 617 | } |
618 | |||
619 | static __init int activate_jump_labels(void) | ||
620 | { | ||
621 | if (has_steal_clock) { | ||
622 | jump_label_inc(¶virt_steal_enabled); | ||
623 | if (steal_acc) | ||
624 | jump_label_inc(¶virt_steal_rq_enabled); | ||
625 | } | ||
626 | |||
627 | return 0; | ||
628 | } | ||
629 | arch_initcall(activate_jump_labels); | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 6389a6bca11b..c1a0188e29ae 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -160,6 +160,7 @@ static void __cpuinit kvm_setup_secondary_clock(void) | |||
160 | static void kvm_crash_shutdown(struct pt_regs *regs) | 160 | static void kvm_crash_shutdown(struct pt_regs *regs) |
161 | { | 161 | { |
162 | native_write_msr(msr_kvm_system_time, 0, 0); | 162 | native_write_msr(msr_kvm_system_time, 0, 0); |
163 | kvm_disable_steal_time(); | ||
163 | native_machine_crash_shutdown(regs); | 164 | native_machine_crash_shutdown(regs); |
164 | } | 165 | } |
165 | #endif | 166 | #endif |
@@ -167,6 +168,7 @@ static void kvm_crash_shutdown(struct pt_regs *regs) | |||
167 | static void kvm_shutdown(void) | 168 | static void kvm_shutdown(void) |
168 | { | 169 | { |
169 | native_write_msr(msr_kvm_system_time, 0, 0); | 170 | native_write_msr(msr_kvm_system_time, 0, 0); |
171 | kvm_disable_steal_time(); | ||
170 | native_machine_shutdown(); | 172 | native_machine_shutdown(); |
171 | } | 173 | } |
172 | 174 | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index c5610384ab16..591be0ee1934 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -66,8 +66,8 @@ struct microcode_amd { | |||
66 | unsigned int mpb[0]; | 66 | unsigned int mpb[0]; |
67 | }; | 67 | }; |
68 | 68 | ||
69 | #define UCODE_CONTAINER_SECTION_HDR 8 | 69 | #define SECTION_HDR_SIZE 8 |
70 | #define UCODE_CONTAINER_HEADER_SIZE 12 | 70 | #define CONTAINER_HDR_SZ 12 |
71 | 71 | ||
72 | static struct equiv_cpu_entry *equiv_cpu_table; | 72 | static struct equiv_cpu_entry *equiv_cpu_table; |
73 | 73 | ||
@@ -157,7 +157,7 @@ static int apply_microcode_amd(int cpu) | |||
157 | static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) | 157 | static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) |
158 | { | 158 | { |
159 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 159 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
160 | unsigned int max_size, actual_size; | 160 | u32 max_size, actual_size; |
161 | 161 | ||
162 | #define F1XH_MPB_MAX_SIZE 2048 | 162 | #define F1XH_MPB_MAX_SIZE 2048 |
163 | #define F14H_MPB_MAX_SIZE 1824 | 163 | #define F14H_MPB_MAX_SIZE 1824 |
@@ -175,9 +175,9 @@ static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) | |||
175 | break; | 175 | break; |
176 | } | 176 | } |
177 | 177 | ||
178 | actual_size = buf[4] + (buf[5] << 8); | 178 | actual_size = *(u32 *)(buf + 4); |
179 | 179 | ||
180 | if (actual_size > size || actual_size > max_size) { | 180 | if (actual_size + SECTION_HDR_SIZE > size || actual_size > max_size) { |
181 | pr_err("section size mismatch\n"); | 181 | pr_err("section size mismatch\n"); |
182 | return 0; | 182 | return 0; |
183 | } | 183 | } |
@@ -191,7 +191,7 @@ get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
191 | struct microcode_header_amd *mc = NULL; | 191 | struct microcode_header_amd *mc = NULL; |
192 | unsigned int actual_size = 0; | 192 | unsigned int actual_size = 0; |
193 | 193 | ||
194 | if (buf[0] != UCODE_UCODE_TYPE) { | 194 | if (*(u32 *)buf != UCODE_UCODE_TYPE) { |
195 | pr_err("invalid type field in container file section header\n"); | 195 | pr_err("invalid type field in container file section header\n"); |
196 | goto out; | 196 | goto out; |
197 | } | 197 | } |
@@ -204,8 +204,8 @@ get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
204 | if (!mc) | 204 | if (!mc) |
205 | goto out; | 205 | goto out; |
206 | 206 | ||
207 | get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size); | 207 | get_ucode_data(mc, buf + SECTION_HDR_SIZE, actual_size); |
208 | *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR; | 208 | *mc_size = actual_size + SECTION_HDR_SIZE; |
209 | 209 | ||
210 | out: | 210 | out: |
211 | return mc; | 211 | return mc; |
@@ -229,9 +229,10 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
229 | return -ENOMEM; | 229 | return -ENOMEM; |
230 | } | 230 | } |
231 | 231 | ||
232 | get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size); | 232 | get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); |
233 | 233 | ||
234 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 234 | /* add header length */ |
235 | return size + CONTAINER_HDR_SZ; | ||
235 | } | 236 | } |
236 | 237 | ||
237 | static void free_equiv_cpu_table(void) | 238 | static void free_equiv_cpu_table(void) |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 52f256f2cc81..925179f871de 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -45,21 +45,6 @@ void *module_alloc(unsigned long size) | |||
45 | -1, __builtin_return_address(0)); | 45 | -1, __builtin_return_address(0)); |
46 | } | 46 | } |
47 | 47 | ||
48 | /* Free memory returned from module_alloc */ | ||
49 | void module_free(struct module *mod, void *module_region) | ||
50 | { | ||
51 | vfree(module_region); | ||
52 | } | ||
53 | |||
54 | /* We don't need anything special. */ | ||
55 | int module_frob_arch_sections(Elf_Ehdr *hdr, | ||
56 | Elf_Shdr *sechdrs, | ||
57 | char *secstrings, | ||
58 | struct module *mod) | ||
59 | { | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | #ifdef CONFIG_X86_32 | 48 | #ifdef CONFIG_X86_32 |
64 | int apply_relocate(Elf32_Shdr *sechdrs, | 49 | int apply_relocate(Elf32_Shdr *sechdrs, |
65 | const char *strtab, | 50 | const char *strtab, |
@@ -100,17 +85,6 @@ int apply_relocate(Elf32_Shdr *sechdrs, | |||
100 | } | 85 | } |
101 | return 0; | 86 | return 0; |
102 | } | 87 | } |
103 | |||
104 | int apply_relocate_add(Elf32_Shdr *sechdrs, | ||
105 | const char *strtab, | ||
106 | unsigned int symindex, | ||
107 | unsigned int relsec, | ||
108 | struct module *me) | ||
109 | { | ||
110 | printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", | ||
111 | me->name); | ||
112 | return -ENOEXEC; | ||
113 | } | ||
114 | #else /*X86_64*/ | 88 | #else /*X86_64*/ |
115 | int apply_relocate_add(Elf64_Shdr *sechdrs, | 89 | int apply_relocate_add(Elf64_Shdr *sechdrs, |
116 | const char *strtab, | 90 | const char *strtab, |
@@ -181,17 +155,6 @@ overflow: | |||
181 | me->name); | 155 | me->name); |
182 | return -ENOEXEC; | 156 | return -ENOEXEC; |
183 | } | 157 | } |
184 | |||
185 | int apply_relocate(Elf_Shdr *sechdrs, | ||
186 | const char *strtab, | ||
187 | unsigned int symindex, | ||
188 | unsigned int relsec, | ||
189 | struct module *me) | ||
190 | { | ||
191 | printk(KERN_ERR "non add relocation not supported\n"); | ||
192 | return -ENOSYS; | ||
193 | } | ||
194 | |||
195 | #endif | 158 | #endif |
196 | 159 | ||
197 | int module_finalize(const Elf_Ehdr *hdr, | 160 | int module_finalize(const Elf_Ehdr *hdr, |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 869e1aeeb71b..613a7931ecc1 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -202,6 +202,14 @@ static void native_flush_tlb_single(unsigned long addr) | |||
202 | __native_flush_tlb_single(addr); | 202 | __native_flush_tlb_single(addr); |
203 | } | 203 | } |
204 | 204 | ||
205 | struct jump_label_key paravirt_steal_enabled; | ||
206 | struct jump_label_key paravirt_steal_rq_enabled; | ||
207 | |||
208 | static u64 native_steal_clock(int cpu) | ||
209 | { | ||
210 | return 0; | ||
211 | } | ||
212 | |||
205 | /* These are in entry.S */ | 213 | /* These are in entry.S */ |
206 | extern void native_iret(void); | 214 | extern void native_iret(void); |
207 | extern void native_irq_enable_sysexit(void); | 215 | extern void native_irq_enable_sysexit(void); |
@@ -307,6 +315,7 @@ struct pv_init_ops pv_init_ops = { | |||
307 | 315 | ||
308 | struct pv_time_ops pv_time_ops = { | 316 | struct pv_time_ops pv_time_ops = { |
309 | .sched_clock = native_sched_clock, | 317 | .sched_clock = native_sched_clock, |
318 | .steal_clock = native_steal_clock, | ||
310 | }; | 319 | }; |
311 | 320 | ||
312 | struct pv_irq_ops pv_irq_ops = { | 321 | struct pv_irq_ops pv_irq_ops = { |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e8c33a302006..726494b58345 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -1553,7 +1553,7 @@ static void __init calgary_fixup_one_tce_space(struct pci_dev *dev) | |||
1553 | continue; | 1553 | continue; |
1554 | 1554 | ||
1555 | /* cover the whole region */ | 1555 | /* cover the whole region */ |
1556 | npages = (r->end - r->start) >> PAGE_SHIFT; | 1556 | npages = resource_size(r) >> PAGE_SHIFT; |
1557 | npages++; | 1557 | npages++; |
1558 | 1558 | ||
1559 | iommu_range_reserve(tbl, r->start, npages); | 1559 | iommu_range_reserve(tbl, r->start, npages); |
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index ba0a4cce53be..63228035f9d7 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c | |||
@@ -234,7 +234,7 @@ void __init probe_roms(void) | |||
234 | /* check for extension rom (ignore length byte!) */ | 234 | /* check for extension rom (ignore length byte!) */ |
235 | rom = isa_bus_to_virt(extension_rom_resource.start); | 235 | rom = isa_bus_to_virt(extension_rom_resource.start); |
236 | if (romsignature(rom)) { | 236 | if (romsignature(rom)) { |
237 | length = extension_rom_resource.end - extension_rom_resource.start + 1; | 237 | length = resource_size(&extension_rom_resource); |
238 | if (romchecksum(rom, length)) { | 238 | if (romchecksum(rom, length)) { |
239 | request_resource(&iomem_resource, &extension_rom_resource); | 239 | request_resource(&iomem_resource, &extension_rom_resource); |
240 | upper = extension_rom_resource.start; | 240 | upper = extension_rom_resource.start; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 807c2a2b80f1..82528799c5de 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target, | |||
528 | return ret; | 528 | return ret; |
529 | } | 529 | } |
530 | 530 | ||
531 | static void ptrace_triggered(struct perf_event *bp, int nmi, | 531 | static void ptrace_triggered(struct perf_event *bp, |
532 | struct perf_sample_data *data, | 532 | struct perf_sample_data *data, |
533 | struct pt_regs *regs) | 533 | struct pt_regs *regs) |
534 | { | 534 | { |
@@ -715,7 +715,8 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
715 | attr.bp_type = HW_BREAKPOINT_W; | 715 | attr.bp_type = HW_BREAKPOINT_W; |
716 | attr.disabled = 1; | 716 | attr.disabled = 1; |
717 | 717 | ||
718 | bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); | 718 | bp = register_user_hw_breakpoint(&attr, ptrace_triggered, |
719 | NULL, tsk); | ||
719 | 720 | ||
720 | /* | 721 | /* |
721 | * CHECKME: the previous code returned -EIO if the addr wasn't | 722 | * CHECKME: the previous code returned -EIO if the addr wasn't |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 8bbe8c56916d..b78643d0f9a5 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -10,7 +10,7 @@ | |||
10 | 10 | ||
11 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | 11 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) |
12 | { | 12 | { |
13 | u8 config, rev; | 13 | u8 config; |
14 | u16 word; | 14 | u16 word; |
15 | 15 | ||
16 | /* BIOS may enable hardware IRQ balancing for | 16 | /* BIOS may enable hardware IRQ balancing for |
@@ -18,8 +18,7 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | |||
18 | * based platforms. | 18 | * based platforms. |
19 | * Disable SW irqbalance/affinity on those platforms. | 19 | * Disable SW irqbalance/affinity on those platforms. |
20 | */ | 20 | */ |
21 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | 21 | if (dev->revision > 0x9) |
22 | if (rev > 0x9) | ||
23 | return; | 22 | return; |
24 | 23 | ||
25 | /* enable access to config space*/ | 24 | /* enable access to config space*/ |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0c016f727695..9242436e9937 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -294,6 +294,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
294 | DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"), | 294 | DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"), |
295 | }, | 295 | }, |
296 | }, | 296 | }, |
297 | { /* Handle reboot issue on Acer Aspire one */ | ||
298 | .callback = set_bios_reboot, | ||
299 | .ident = "Acer Aspire One A110", | ||
300 | .matches = { | ||
301 | DMI_MATCH(DMI_SYS_VENDOR, "Acer"), | ||
302 | DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), | ||
303 | }, | ||
304 | }, | ||
297 | { } | 305 | { } |
298 | }; | 306 | }; |
299 | 307 | ||
@@ -411,6 +419,30 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | |||
411 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), | 419 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), |
412 | }, | 420 | }, |
413 | }, | 421 | }, |
422 | { /* Handle problems with rebooting on the Latitude E6320. */ | ||
423 | .callback = set_pci_reboot, | ||
424 | .ident = "Dell Latitude E6320", | ||
425 | .matches = { | ||
426 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
427 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"), | ||
428 | }, | ||
429 | }, | ||
430 | { /* Handle problems with rebooting on the Latitude E5420. */ | ||
431 | .callback = set_pci_reboot, | ||
432 | .ident = "Dell Latitude E5420", | ||
433 | .matches = { | ||
434 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
435 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"), | ||
436 | }, | ||
437 | }, | ||
438 | { /* Handle problems with rebooting on the Latitude E6420. */ | ||
439 | .callback = set_pci_reboot, | ||
440 | .ident = "Dell Latitude E6420", | ||
441 | .matches = { | ||
442 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
443 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), | ||
444 | }, | ||
445 | }, | ||
414 | { } | 446 | { } |
415 | }; | 447 | }; |
416 | 448 | ||
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 41235531b11c..36818f8ec2be 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -97,6 +97,8 @@ relocate_kernel: | |||
97 | ret | 97 | ret |
98 | 98 | ||
99 | identity_mapped: | 99 | identity_mapped: |
100 | /* set return address to 0 if not preserving context */ | ||
101 | pushl $0 | ||
100 | /* store the start address on the stack */ | 102 | /* store the start address on the stack */ |
101 | pushl %edx | 103 | pushl %edx |
102 | 104 | ||
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 4de8f5b3d476..7a6f3b3be3cf 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S | |||
@@ -100,6 +100,8 @@ relocate_kernel: | |||
100 | ret | 100 | ret |
101 | 101 | ||
102 | identity_mapped: | 102 | identity_mapped: |
103 | /* set return address to 0 if not preserving context */ | ||
104 | pushq $0 | ||
103 | /* store the start address on the stack */ | 105 | /* store the start address on the stack */ |
104 | pushq %rdx | 106 | pushq %rdx |
105 | 107 | ||
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 40a24932a8a1..54ddaeb221c1 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -485,17 +485,18 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
485 | asmlinkage int | 485 | asmlinkage int |
486 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | 486 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) |
487 | { | 487 | { |
488 | mask &= _BLOCKABLE; | 488 | sigset_t blocked; |
489 | spin_lock_irq(¤t->sighand->siglock); | 489 | |
490 | current->saved_sigmask = current->blocked; | 490 | current->saved_sigmask = current->blocked; |
491 | siginitset(¤t->blocked, mask); | 491 | |
492 | recalc_sigpending(); | 492 | mask &= _BLOCKABLE; |
493 | spin_unlock_irq(¤t->sighand->siglock); | 493 | siginitset(&blocked, mask); |
494 | set_current_blocked(&blocked); | ||
494 | 495 | ||
495 | current->state = TASK_INTERRUPTIBLE; | 496 | current->state = TASK_INTERRUPTIBLE; |
496 | schedule(); | 497 | schedule(); |
497 | set_restore_sigmask(); | ||
498 | 498 | ||
499 | set_restore_sigmask(); | ||
499 | return -ERESTARTNOHAND; | 500 | return -ERESTARTNOHAND; |
500 | } | 501 | } |
501 | 502 | ||
@@ -572,10 +573,7 @@ unsigned long sys_sigreturn(struct pt_regs *regs) | |||
572 | goto badframe; | 573 | goto badframe; |
573 | 574 | ||
574 | sigdelsetmask(&set, ~_BLOCKABLE); | 575 | sigdelsetmask(&set, ~_BLOCKABLE); |
575 | spin_lock_irq(¤t->sighand->siglock); | 576 | set_current_blocked(&set); |
576 | current->blocked = set; | ||
577 | recalc_sigpending(); | ||
578 | spin_unlock_irq(¤t->sighand->siglock); | ||
579 | 577 | ||
580 | if (restore_sigcontext(regs, &frame->sc, &ax)) | 578 | if (restore_sigcontext(regs, &frame->sc, &ax)) |
581 | goto badframe; | 579 | goto badframe; |
@@ -653,11 +651,15 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
653 | 651 | ||
654 | static int | 652 | static int |
655 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 653 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
656 | sigset_t *set, struct pt_regs *regs) | 654 | struct pt_regs *regs) |
657 | { | 655 | { |
658 | int usig = signr_convert(sig); | 656 | int usig = signr_convert(sig); |
657 | sigset_t *set = ¤t->blocked; | ||
659 | int ret; | 658 | int ret; |
660 | 659 | ||
660 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) | ||
661 | set = ¤t->saved_sigmask; | ||
662 | |||
661 | /* Set up the stack frame */ | 663 | /* Set up the stack frame */ |
662 | if (is_ia32) { | 664 | if (is_ia32) { |
663 | if (ka->sa.sa_flags & SA_SIGINFO) | 665 | if (ka->sa.sa_flags & SA_SIGINFO) |
@@ -672,12 +674,13 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
672 | return -EFAULT; | 674 | return -EFAULT; |
673 | } | 675 | } |
674 | 676 | ||
677 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
675 | return ret; | 678 | return ret; |
676 | } | 679 | } |
677 | 680 | ||
678 | static int | 681 | static int |
679 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 682 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
680 | sigset_t *oldset, struct pt_regs *regs) | 683 | struct pt_regs *regs) |
681 | { | 684 | { |
682 | sigset_t blocked; | 685 | sigset_t blocked; |
683 | int ret; | 686 | int ret; |
@@ -712,20 +715,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
712 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | 715 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) |
713 | regs->flags &= ~X86_EFLAGS_TF; | 716 | regs->flags &= ~X86_EFLAGS_TF; |
714 | 717 | ||
715 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | 718 | ret = setup_rt_frame(sig, ka, info, regs); |
716 | 719 | ||
717 | if (ret) | 720 | if (ret) |
718 | return ret; | 721 | return ret; |
719 | 722 | ||
720 | #ifdef CONFIG_X86_64 | ||
721 | /* | ||
722 | * This has nothing to do with segment registers, | ||
723 | * despite the name. This magic affects uaccess.h | ||
724 | * macros' behavior. Reset it to the normal setting. | ||
725 | */ | ||
726 | set_fs(USER_DS); | ||
727 | #endif | ||
728 | |||
729 | /* | 723 | /* |
730 | * Clear the direction flag as per the ABI for function entry. | 724 | * Clear the direction flag as per the ABI for function entry. |
731 | */ | 725 | */ |
@@ -767,7 +761,6 @@ static void do_signal(struct pt_regs *regs) | |||
767 | struct k_sigaction ka; | 761 | struct k_sigaction ka; |
768 | siginfo_t info; | 762 | siginfo_t info; |
769 | int signr; | 763 | int signr; |
770 | sigset_t *oldset; | ||
771 | 764 | ||
772 | /* | 765 | /* |
773 | * We want the common case to go fast, which is why we may in certain | 766 | * We want the common case to go fast, which is why we may in certain |
@@ -779,23 +772,10 @@ static void do_signal(struct pt_regs *regs) | |||
779 | if (!user_mode(regs)) | 772 | if (!user_mode(regs)) |
780 | return; | 773 | return; |
781 | 774 | ||
782 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) | ||
783 | oldset = ¤t->saved_sigmask; | ||
784 | else | ||
785 | oldset = ¤t->blocked; | ||
786 | |||
787 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 775 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
788 | if (signr > 0) { | 776 | if (signr > 0) { |
789 | /* Whee! Actually deliver the signal. */ | 777 | /* Whee! Actually deliver the signal. */ |
790 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 778 | handle_signal(signr, &info, &ka, regs); |
791 | /* | ||
792 | * A signal was successfully delivered; the saved | ||
793 | * sigmask will have been stored in the signal frame, | ||
794 | * and will be restored by sigreturn, so we can simply | ||
795 | * clear the TS_RESTORE_SIGMASK flag. | ||
796 | */ | ||
797 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
798 | } | ||
799 | return; | 779 | return; |
800 | } | 780 | } |
801 | 781 | ||
@@ -823,7 +803,7 @@ static void do_signal(struct pt_regs *regs) | |||
823 | */ | 803 | */ |
824 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { | 804 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { |
825 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | 805 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; |
826 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | 806 | set_current_blocked(¤t->saved_sigmask); |
827 | } | 807 | } |
828 | } | 808 | } |
829 | 809 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9fd3137230d4..9f548cb4a958 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -438,7 +438,7 @@ static void impress_friends(void) | |||
438 | void __inquire_remote_apic(int apicid) | 438 | void __inquire_remote_apic(int apicid) |
439 | { | 439 | { |
440 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 440 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
441 | char *names[] = { "ID", "VERSION", "SPIV" }; | 441 | const char * const names[] = { "ID", "VERSION", "SPIV" }; |
442 | int timeout; | 442 | int timeout; |
443 | u32 status; | 443 | u32 status; |
444 | 444 | ||
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 55d9bc03f696..fdd0c6430e5a 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -66,7 +66,7 @@ void save_stack_trace(struct stack_trace *trace) | |||
66 | } | 66 | } |
67 | EXPORT_SYMBOL_GPL(save_stack_trace); | 67 | EXPORT_SYMBOL_GPL(save_stack_trace); |
68 | 68 | ||
69 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) | 69 | void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) |
70 | { | 70 | { |
71 | dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); | 71 | dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); |
72 | if (trace->nr_entries < trace->max_entries) | 72 | if (trace->nr_entries < trace->max_entries) |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 30ac65df7d4e..e07a2fc876b9 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/bootparam.h> | 36 | #include <asm/bootparam.h> |
37 | #include <asm/pgtable.h> | 37 | #include <asm/pgtable.h> |
38 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
39 | #include <asm/swiotlb.h> | ||
39 | #include <asm/fixmap.h> | 40 | #include <asm/fixmap.h> |
40 | #include <asm/proto.h> | 41 | #include <asm/proto.h> |
41 | #include <asm/setup.h> | 42 | #include <asm/setup.h> |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 00cbb272627f..5a64d057be57 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -11,13 +11,13 @@ | |||
11 | 11 | ||
12 | #include <linux/clockchips.h> | 12 | #include <linux/clockchips.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/i8253.h> | ||
14 | #include <linux/time.h> | 15 | #include <linux/time.h> |
15 | #include <linux/mca.h> | 16 | #include <linux/mca.h> |
16 | 17 | ||
17 | #include <asm/vsyscall.h> | 18 | #include <asm/vsyscall.h> |
18 | #include <asm/x86_init.h> | 19 | #include <asm/x86_init.h> |
19 | #include <asm/i8259.h> | 20 | #include <asm/i8259.h> |
20 | #include <asm/i8253.h> | ||
21 | #include <asm/timer.h> | 21 | #include <asm/timer.h> |
22 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b9b67166f9de..fbc097a085ca 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -872,6 +872,12 @@ void __init trap_init(void) | |||
872 | set_bit(SYSCALL_VECTOR, used_vectors); | 872 | set_bit(SYSCALL_VECTOR, used_vectors); |
873 | #endif | 873 | #endif |
874 | 874 | ||
875 | #ifdef CONFIG_X86_64 | ||
876 | BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); | ||
877 | set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); | ||
878 | set_bit(VSYSCALL_EMU_VECTOR, used_vectors); | ||
879 | #endif | ||
880 | |||
875 | /* | 881 | /* |
876 | * Should be a barrier for any external CPU state: | 882 | * Should be a barrier for any external CPU state: |
877 | */ | 883 | */ |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6cc6922262af..db483369f10b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/timer.h> | 5 | #include <linux/timer.h> |
6 | #include <linux/acpi_pmtmr.h> | 6 | #include <linux/acpi_pmtmr.h> |
7 | #include <linux/cpufreq.h> | 7 | #include <linux/cpufreq.h> |
8 | #include <linux/dmi.h> | ||
9 | #include <linux/delay.h> | 8 | #include <linux/delay.h> |
10 | #include <linux/clocksource.h> | 9 | #include <linux/clocksource.h> |
11 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
@@ -777,7 +776,7 @@ static struct clocksource clocksource_tsc = { | |||
777 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 776 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
778 | CLOCK_SOURCE_MUST_VERIFY, | 777 | CLOCK_SOURCE_MUST_VERIFY, |
779 | #ifdef CONFIG_X86_64 | 778 | #ifdef CONFIG_X86_64 |
780 | .vread = vread_tsc, | 779 | .archdata = { .vclock_mode = VCLOCK_TSC }, |
781 | #endif | 780 | #endif |
782 | }; | 781 | }; |
783 | 782 | ||
@@ -800,27 +799,6 @@ void mark_tsc_unstable(char *reason) | |||
800 | 799 | ||
801 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | 800 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); |
802 | 801 | ||
803 | static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) | ||
804 | { | ||
805 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | ||
806 | d->ident); | ||
807 | tsc_unstable = 1; | ||
808 | return 0; | ||
809 | } | ||
810 | |||
811 | /* List of systems that have known TSC problems */ | ||
812 | static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | ||
813 | { | ||
814 | .callback = dmi_mark_tsc_unstable, | ||
815 | .ident = "IBM Thinkpad 380XD", | ||
816 | .matches = { | ||
817 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
818 | DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), | ||
819 | }, | ||
820 | }, | ||
821 | {} | ||
822 | }; | ||
823 | |||
824 | static void __init check_system_tsc_reliable(void) | 802 | static void __init check_system_tsc_reliable(void) |
825 | { | 803 | { |
826 | #ifdef CONFIG_MGEODE_LX | 804 | #ifdef CONFIG_MGEODE_LX |
@@ -1010,8 +988,6 @@ void __init tsc_init(void) | |||
1010 | lpj_fine = lpj; | 988 | lpj_fine = lpj; |
1011 | 989 | ||
1012 | use_tsc_delay(); | 990 | use_tsc_delay(); |
1013 | /* Check and install the TSC clocksource */ | ||
1014 | dmi_check_system(bad_tsc_dmi_table); | ||
1015 | 991 | ||
1016 | if (unsynchronized_tsc()) | 992 | if (unsynchronized_tsc()) |
1017 | mark_tsc_unstable("TSCs unsynchronized"); | 993 | mark_tsc_unstable("TSCs unsynchronized"); |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 89aed99aafce..4aa9c54a9b76 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -161,50 +161,47 @@ SECTIONS | |||
161 | 161 | ||
162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) | 162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) |
163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | 163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) |
164 | #define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \ | ||
165 | ADDR(.vsyscall_0) + offset \ | ||
166 | : AT(VLOAD(.vsyscall_var_ ## x)) { \ | ||
167 | *(.vsyscall_var_ ## x) \ | ||
168 | } \ | ||
169 | x = VVIRT(.vsyscall_var_ ## x); | ||
170 | 164 | ||
171 | . = ALIGN(4096); | 165 | . = ALIGN(4096); |
172 | __vsyscall_0 = .; | 166 | __vsyscall_0 = .; |
173 | 167 | ||
174 | . = VSYSCALL_ADDR; | 168 | . = VSYSCALL_ADDR; |
175 | .vsyscall_0 : AT(VLOAD(.vsyscall_0)) { | 169 | .vsyscall : AT(VLOAD(.vsyscall)) { |
176 | *(.vsyscall_0) | 170 | *(.vsyscall_0) |
177 | } :user | ||
178 | 171 | ||
179 | . = ALIGN(L1_CACHE_BYTES); | 172 | . = 1024; |
180 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { | ||
181 | *(.vsyscall_fn) | ||
182 | } | ||
183 | |||
184 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { | ||
185 | *(.vsyscall_1) | 173 | *(.vsyscall_1) |
186 | } | ||
187 | .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { | ||
188 | *(.vsyscall_2) | ||
189 | } | ||
190 | 174 | ||
191 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { | 175 | . = 2048; |
192 | *(.vsyscall_3) | 176 | *(.vsyscall_2) |
193 | } | ||
194 | |||
195 | #define __VVAR_KERNEL_LDS | ||
196 | #include <asm/vvar.h> | ||
197 | #undef __VVAR_KERNEL_LDS | ||
198 | 177 | ||
199 | . = __vsyscall_0 + PAGE_SIZE; | 178 | . = 4096; /* Pad the whole page. */ |
179 | } :user =0xcc | ||
180 | . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE); | ||
200 | 181 | ||
201 | #undef VSYSCALL_ADDR | 182 | #undef VSYSCALL_ADDR |
202 | #undef VLOAD_OFFSET | 183 | #undef VLOAD_OFFSET |
203 | #undef VLOAD | 184 | #undef VLOAD |
204 | #undef VVIRT_OFFSET | 185 | #undef VVIRT_OFFSET |
205 | #undef VVIRT | 186 | #undef VVIRT |
187 | |||
188 | __vvar_page = .; | ||
189 | |||
190 | .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { | ||
191 | |||
192 | /* Place all vvars at the offsets in asm/vvar.h. */ | ||
193 | #define EMIT_VVAR(name, offset) \ | ||
194 | . = offset; \ | ||
195 | *(.vvar_ ## name) | ||
196 | #define __VVAR_KERNEL_LDS | ||
197 | #include <asm/vvar.h> | ||
198 | #undef __VVAR_KERNEL_LDS | ||
206 | #undef EMIT_VVAR | 199 | #undef EMIT_VVAR |
207 | 200 | ||
201 | } :data | ||
202 | |||
203 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); | ||
204 | |||
208 | #endif /* CONFIG_X86_64 */ | 205 | #endif /* CONFIG_X86_64 */ |
209 | 206 | ||
210 | /* Init code and data - will be freed after init */ | 207 | /* Init code and data - will be freed after init */ |
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c deleted file mode 100644 index a81aa9e9894c..000000000000 --- a/arch/x86/kernel/vread_tsc_64.c +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | /* This code runs in userspace. */ | ||
2 | |||
3 | #define DISABLE_BRANCH_PROFILING | ||
4 | #include <asm/vgtod.h> | ||
5 | |||
6 | notrace cycle_t __vsyscall_fn vread_tsc(void) | ||
7 | { | ||
8 | cycle_t ret; | ||
9 | u64 last; | ||
10 | |||
11 | /* | ||
12 | * Empirically, a fence (of type that depends on the CPU) | ||
13 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
14 | * with respect to loads. The various CPU manuals are unclear | ||
15 | * as to whether rdtsc can be reordered with later loads, | ||
16 | * but no one has ever seen it happen. | ||
17 | */ | ||
18 | rdtsc_barrier(); | ||
19 | ret = (cycle_t)vget_cycles(); | ||
20 | |||
21 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
22 | |||
23 | if (likely(ret >= last)) | ||
24 | return ret; | ||
25 | |||
26 | /* | ||
27 | * GCC likes to generate cmov here, but this branch is extremely | ||
28 | * predictable (it's just a funciton of time and the likely is | ||
29 | * very likely) and there's a data dependence, so force GCC | ||
30 | * to generate a branch instead. I don't barrier() because | ||
31 | * we don't actually need a barrier, and if this function | ||
32 | * ever gets inlined it will generate worse code. | ||
33 | */ | ||
34 | asm volatile (""); | ||
35 | return last; | ||
36 | } | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3e682184d76c..dda7dff9cef7 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE | 2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE |
3 | * Copyright 2003 Andi Kleen, SuSE Labs. | 3 | * Copyright 2003 Andi Kleen, SuSE Labs. |
4 | * | 4 | * |
5 | * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] | ||
6 | * | ||
5 | * Thanks to hpa@transmeta.com for some useful hint. | 7 | * Thanks to hpa@transmeta.com for some useful hint. |
6 | * Special thanks to Ingo Molnar for his early experience with | 8 | * Special thanks to Ingo Molnar for his early experience with |
7 | * a different vsyscall implementation for Linux/IA32 and for the name. | 9 | * a different vsyscall implementation for Linux/IA32 and for the name. |
@@ -11,10 +13,9 @@ | |||
11 | * vsyscalls. One vsyscall can reserve more than 1 slot to avoid | 13 | * vsyscalls. One vsyscall can reserve more than 1 slot to avoid |
12 | * jumping out of line if necessary. We cannot add more with this | 14 | * jumping out of line if necessary. We cannot add more with this |
13 | * mechanism because older kernels won't return -ENOSYS. | 15 | * mechanism because older kernels won't return -ENOSYS. |
14 | * If we want more than four we need a vDSO. | ||
15 | * | 16 | * |
16 | * Note: the concept clashes with user mode linux. If you use UML and | 17 | * Note: the concept clashes with user mode linux. UML users should |
17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 18 | * use the vDSO. |
18 | */ | 19 | */ |
19 | 20 | ||
20 | /* Disable profiling for userspace code: */ | 21 | /* Disable profiling for userspace code: */ |
@@ -32,9 +33,12 @@ | |||
32 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
33 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
34 | #include <linux/notifier.h> | 35 | #include <linux/notifier.h> |
36 | #include <linux/syscalls.h> | ||
37 | #include <linux/ratelimit.h> | ||
35 | 38 | ||
36 | #include <asm/vsyscall.h> | 39 | #include <asm/vsyscall.h> |
37 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
41 | #include <asm/compat.h> | ||
38 | #include <asm/page.h> | 42 | #include <asm/page.h> |
39 | #include <asm/unistd.h> | 43 | #include <asm/unistd.h> |
40 | #include <asm/fixmap.h> | 44 | #include <asm/fixmap.h> |
@@ -44,16 +48,12 @@ | |||
44 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
45 | #include <asm/topology.h> | 49 | #include <asm/topology.h> |
46 | #include <asm/vgtod.h> | 50 | #include <asm/vgtod.h> |
47 | 51 | #include <asm/traps.h> | |
48 | #define __vsyscall(nr) \ | ||
49 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace | ||
50 | #define __syscall_clobber "r11","cx","memory" | ||
51 | 52 | ||
52 | DEFINE_VVAR(int, vgetcpu_mode); | 53 | DEFINE_VVAR(int, vgetcpu_mode); |
53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | 54 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
54 | { | 55 | { |
55 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 56 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
56 | .sysctl_enabled = 1, | ||
57 | }; | 57 | }; |
58 | 58 | ||
59 | void update_vsyscall_tz(void) | 59 | void update_vsyscall_tz(void) |
@@ -72,179 +72,149 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
72 | unsigned long flags; | 72 | unsigned long flags; |
73 | 73 | ||
74 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); | 74 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); |
75 | |||
75 | /* copy vsyscall data */ | 76 | /* copy vsyscall data */ |
76 | vsyscall_gtod_data.clock.vread = clock->vread; | 77 | vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; |
77 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 78 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
78 | vsyscall_gtod_data.clock.mask = clock->mask; | 79 | vsyscall_gtod_data.clock.mask = clock->mask; |
79 | vsyscall_gtod_data.clock.mult = mult; | 80 | vsyscall_gtod_data.clock.mult = mult; |
80 | vsyscall_gtod_data.clock.shift = clock->shift; | 81 | vsyscall_gtod_data.clock.shift = clock->shift; |
81 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 82 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
82 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 83 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
83 | vsyscall_gtod_data.wall_to_monotonic = *wtm; | 84 | vsyscall_gtod_data.wall_to_monotonic = *wtm; |
84 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | 85 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); |
86 | |||
85 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 87 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
86 | } | 88 | } |
87 | 89 | ||
88 | /* RED-PEN may want to readd seq locking, but then the variable should be | 90 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
89 | * write-once. | 91 | const char *message) |
90 | */ | ||
91 | static __always_inline void do_get_tz(struct timezone * tz) | ||
92 | { | 92 | { |
93 | *tz = VVAR(vsyscall_gtod_data).sys_tz; | 93 | static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
94 | } | 94 | struct task_struct *tsk; |
95 | 95 | ||
96 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 96 | if (!show_unhandled_signals || !__ratelimit(&rs)) |
97 | { | 97 | return; |
98 | int ret; | ||
99 | asm volatile("syscall" | ||
100 | : "=a" (ret) | ||
101 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) | ||
102 | : __syscall_clobber ); | ||
103 | return ret; | ||
104 | } | ||
105 | 98 | ||
106 | static __always_inline long time_syscall(long *t) | 99 | tsk = current; |
107 | { | ||
108 | long secs; | ||
109 | asm volatile("syscall" | ||
110 | : "=a" (secs) | ||
111 | : "0" (__NR_time),"D" (t) : __syscall_clobber); | ||
112 | return secs; | ||
113 | } | ||
114 | 100 | ||
115 | static __always_inline void do_vgettimeofday(struct timeval * tv) | 101 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
116 | { | 102 | level, tsk->comm, task_pid_nr(tsk), |
117 | cycle_t now, base, mask, cycle_delta; | 103 | message, regs->ip - 2, regs->cs, |
118 | unsigned seq; | 104 | regs->sp, regs->ax, regs->si, regs->di); |
119 | unsigned long mult, shift, nsec; | ||
120 | cycle_t (*vread)(void); | ||
121 | do { | ||
122 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | ||
123 | |||
124 | vread = VVAR(vsyscall_gtod_data).clock.vread; | ||
125 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled || | ||
126 | !vread)) { | ||
127 | gettimeofday(tv,NULL); | ||
128 | return; | ||
129 | } | ||
130 | |||
131 | now = vread(); | ||
132 | base = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
133 | mask = VVAR(vsyscall_gtod_data).clock.mask; | ||
134 | mult = VVAR(vsyscall_gtod_data).clock.mult; | ||
135 | shift = VVAR(vsyscall_gtod_data).clock.shift; | ||
136 | |||
137 | tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec; | ||
138 | nsec = VVAR(vsyscall_gtod_data).wall_time_nsec; | ||
139 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); | ||
140 | |||
141 | /* calculate interval: */ | ||
142 | cycle_delta = (now - base) & mask; | ||
143 | /* convert to nsecs: */ | ||
144 | nsec += (cycle_delta * mult) >> shift; | ||
145 | |||
146 | while (nsec >= NSEC_PER_SEC) { | ||
147 | tv->tv_sec += 1; | ||
148 | nsec -= NSEC_PER_SEC; | ||
149 | } | ||
150 | tv->tv_usec = nsec / NSEC_PER_USEC; | ||
151 | } | 105 | } |
152 | 106 | ||
153 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | 107 | static int addr_to_vsyscall_nr(unsigned long addr) |
154 | { | 108 | { |
155 | if (tv) | 109 | int nr; |
156 | do_vgettimeofday(tv); | ||
157 | if (tz) | ||
158 | do_get_tz(tz); | ||
159 | return 0; | ||
160 | } | ||
161 | 110 | ||
162 | /* This will break when the xtime seconds get inaccurate, but that is | 111 | if ((addr & ~0xC00UL) != VSYSCALL_START) |
163 | * unlikely */ | 112 | return -EINVAL; |
164 | time_t __vsyscall(1) vtime(time_t *t) | ||
165 | { | ||
166 | unsigned seq; | ||
167 | time_t result; | ||
168 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled)) | ||
169 | return time_syscall(t); | ||
170 | 113 | ||
171 | do { | 114 | nr = (addr & 0xC00UL) >> 10; |
172 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | 115 | if (nr >= 3) |
116 | return -EINVAL; | ||
173 | 117 | ||
174 | result = VVAR(vsyscall_gtod_data).wall_time_sec; | 118 | return nr; |
119 | } | ||
175 | 120 | ||
176 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); | 121 | void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) |
122 | { | ||
123 | struct task_struct *tsk; | ||
124 | unsigned long caller; | ||
125 | int vsyscall_nr; | ||
126 | long ret; | ||
127 | |||
128 | local_irq_enable(); | ||
129 | |||
130 | /* | ||
131 | * Real 64-bit user mode code has cs == __USER_CS. Anything else | ||
132 | * is bogus. | ||
133 | */ | ||
134 | if (regs->cs != __USER_CS) { | ||
135 | /* | ||
136 | * If we trapped from kernel mode, we might as well OOPS now | ||
137 | * instead of returning to some random address and OOPSing | ||
138 | * then. | ||
139 | */ | ||
140 | BUG_ON(!user_mode(regs)); | ||
141 | |||
142 | /* Compat mode and non-compat 32-bit CS should both segfault. */ | ||
143 | warn_bad_vsyscall(KERN_WARNING, regs, | ||
144 | "illegal int 0xcc from 32-bit mode"); | ||
145 | goto sigsegv; | ||
146 | } | ||
177 | 147 | ||
178 | if (t) | 148 | /* |
179 | *t = result; | 149 | * x86-ism here: regs->ip points to the instruction after the int 0xcc, |
180 | return result; | 150 | * and int 0xcc is two bytes long. |
181 | } | 151 | */ |
152 | vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2); | ||
153 | if (vsyscall_nr < 0) { | ||
154 | warn_bad_vsyscall(KERN_WARNING, regs, | ||
155 | "illegal int 0xcc (exploit attempt?)"); | ||
156 | goto sigsegv; | ||
157 | } | ||
182 | 158 | ||
183 | /* Fast way to get current CPU and node. | 159 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { |
184 | This helps to do per node and per CPU caches in user space. | 160 | warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); |
185 | The result is not guaranteed without CPU affinity, but usually | 161 | goto sigsegv; |
186 | works out because the scheduler tries to keep a thread on the same | 162 | } |
187 | CPU. | ||
188 | 163 | ||
189 | tcache must point to a two element sized long array. | 164 | tsk = current; |
190 | All arguments can be NULL. */ | 165 | if (seccomp_mode(&tsk->seccomp)) |
191 | long __vsyscall(2) | 166 | do_exit(SIGKILL); |
192 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | 167 | |
193 | { | 168 | switch (vsyscall_nr) { |
194 | unsigned int p; | 169 | case 0: |
195 | unsigned long j = 0; | 170 | ret = sys_gettimeofday( |
196 | 171 | (struct timeval __user *)regs->di, | |
197 | /* Fast cache - only recompute value once per jiffies and avoid | 172 | (struct timezone __user *)regs->si); |
198 | relatively costly rdtscp/cpuid otherwise. | 173 | break; |
199 | This works because the scheduler usually keeps the process | 174 | |
200 | on the same CPU and this syscall doesn't guarantee its | 175 | case 1: |
201 | results anyways. | 176 | ret = sys_time((time_t __user *)regs->di); |
202 | We do this here because otherwise user space would do it on | 177 | break; |
203 | its own in a likely inferior way (no access to jiffies). | 178 | |
204 | If you don't like it pass NULL. */ | 179 | case 2: |
205 | if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) { | 180 | ret = sys_getcpu((unsigned __user *)regs->di, |
206 | p = tcache->blob[1]; | 181 | (unsigned __user *)regs->si, |
207 | } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { | 182 | 0); |
208 | /* Load per CPU data from RDTSCP */ | 183 | break; |
209 | native_read_tscp(&p); | ||
210 | } else { | ||
211 | /* Load per CPU data from GDT */ | ||
212 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
213 | } | 184 | } |
214 | if (tcache) { | 185 | |
215 | tcache->blob[0] = j; | 186 | if (ret == -EFAULT) { |
216 | tcache->blob[1] = p; | 187 | /* |
188 | * Bad news -- userspace fed a bad pointer to a vsyscall. | ||
189 | * | ||
190 | * With a real vsyscall, that would have caused SIGSEGV. | ||
191 | * To make writing reliable exploits using the emulated | ||
192 | * vsyscalls harder, generate SIGSEGV here as well. | ||
193 | */ | ||
194 | warn_bad_vsyscall(KERN_INFO, regs, | ||
195 | "vsyscall fault (exploit attempt?)"); | ||
196 | goto sigsegv; | ||
217 | } | 197 | } |
218 | if (cpu) | ||
219 | *cpu = p & 0xfff; | ||
220 | if (node) | ||
221 | *node = p >> 12; | ||
222 | return 0; | ||
223 | } | ||
224 | 198 | ||
225 | static long __vsyscall(3) venosys_1(void) | 199 | regs->ax = ret; |
226 | { | ||
227 | return -ENOSYS; | ||
228 | } | ||
229 | 200 | ||
230 | #ifdef CONFIG_SYSCTL | 201 | /* Emulate a ret instruction. */ |
231 | static ctl_table kernel_table2[] = { | 202 | regs->ip = caller; |
232 | { .procname = "vsyscall64", | 203 | regs->sp += 8; |
233 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), | ||
234 | .mode = 0644, | ||
235 | .proc_handler = proc_dointvec }, | ||
236 | {} | ||
237 | }; | ||
238 | 204 | ||
239 | static ctl_table kernel_root_table2[] = { | 205 | local_irq_disable(); |
240 | { .procname = "kernel", .mode = 0555, | 206 | return; |
241 | .child = kernel_table2 }, | 207 | |
242 | {} | 208 | sigsegv: |
243 | }; | 209 | regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */ |
244 | #endif | 210 | force_sig(SIGSEGV, current); |
211 | local_irq_disable(); | ||
212 | } | ||
245 | 213 | ||
246 | /* Assume __initcall executes before all user space. Hopefully kmod | 214 | /* |
247 | doesn't violate that. We'll find out if it does. */ | 215 | * Assume __initcall executes before all user space. Hopefully kmod |
216 | * doesn't violate that. We'll find out if it does. | ||
217 | */ | ||
248 | static void __cpuinit vsyscall_set_cpu(int cpu) | 218 | static void __cpuinit vsyscall_set_cpu(int cpu) |
249 | { | 219 | { |
250 | unsigned long d; | 220 | unsigned long d; |
@@ -255,13 +225,15 @@ static void __cpuinit vsyscall_set_cpu(int cpu) | |||
255 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | 225 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) |
256 | write_rdtscp_aux((node << 12) | cpu); | 226 | write_rdtscp_aux((node << 12) | cpu); |
257 | 227 | ||
258 | /* Store cpu number in limit so that it can be loaded quickly | 228 | /* |
259 | in user space in vgetcpu. | 229 | * Store cpu number in limit so that it can be loaded quickly |
260 | 12 bits for the CPU and 8 bits for the node. */ | 230 | * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) |
231 | */ | ||
261 | d = 0x0f40000000000ULL; | 232 | d = 0x0f40000000000ULL; |
262 | d |= cpu; | 233 | d |= cpu; |
263 | d |= (node & 0xf) << 12; | 234 | d |= (node & 0xf) << 12; |
264 | d |= (node >> 4) << 48; | 235 | d |= (node >> 4) << 48; |
236 | |||
265 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | 237 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); |
266 | } | 238 | } |
267 | 239 | ||
@@ -275,8 +247,10 @@ static int __cpuinit | |||
275 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | 247 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) |
276 | { | 248 | { |
277 | long cpu = (long)arg; | 249 | long cpu = (long)arg; |
250 | |||
278 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | 251 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) |
279 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); | 252 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); |
253 | |||
280 | return NOTIFY_DONE; | 254 | return NOTIFY_DONE; |
281 | } | 255 | } |
282 | 256 | ||
@@ -284,25 +258,23 @@ void __init map_vsyscall(void) | |||
284 | { | 258 | { |
285 | extern char __vsyscall_0; | 259 | extern char __vsyscall_0; |
286 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | 260 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); |
261 | extern char __vvar_page; | ||
262 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); | ||
287 | 263 | ||
288 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | 264 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ |
289 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | 265 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); |
266 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); | ||
267 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); | ||
290 | } | 268 | } |
291 | 269 | ||
292 | static int __init vsyscall_init(void) | 270 | static int __init vsyscall_init(void) |
293 | { | 271 | { |
294 | BUG_ON(((unsigned long) &vgettimeofday != | 272 | BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); |
295 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 273 | |
296 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | ||
297 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | ||
298 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
299 | #ifdef CONFIG_SYSCTL | ||
300 | register_sysctl_table(kernel_root_table2); | ||
301 | #endif | ||
302 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 274 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
303 | /* notifier priority > KVM */ | 275 | /* notifier priority > KVM */ |
304 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | 276 | hotcpu_notifier(cpu_vsyscall_notifier, 30); |
277 | |||
305 | return 0; | 278 | return 0; |
306 | } | 279 | } |
307 | |||
308 | __initcall(vsyscall_init); | 280 | __initcall(vsyscall_init); |
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S new file mode 100644 index 000000000000..ffa845eae5ca --- /dev/null +++ b/arch/x86/kernel/vsyscall_emu_64.S | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * vsyscall_emu_64.S: Vsyscall emulation page | ||
3 | * | ||
4 | * Copyright (c) 2011 Andy Lutomirski | ||
5 | * | ||
6 | * Subject to the GNU General Public License, version 2 | ||
7 | */ | ||
8 | |||
9 | #include <linux/linkage.h> | ||
10 | #include <asm/irq_vectors.h> | ||
11 | |||
12 | /* The unused parts of the page are filled with 0xcc by the linker script. */ | ||
13 | |||
14 | .section .vsyscall_0, "a" | ||
15 | ENTRY(vsyscall_0) | ||
16 | int $VSYSCALL_EMU_VECTOR | ||
17 | END(vsyscall_0) | ||
18 | |||
19 | .section .vsyscall_1, "a" | ||
20 | ENTRY(vsyscall_1) | ||
21 | int $VSYSCALL_EMU_VECTOR | ||
22 | END(vsyscall_1) | ||
23 | |||
24 | .section .vsyscall_2, "a" | ||
25 | ENTRY(vsyscall_2) | ||
26 | int $VSYSCALL_EMU_VECTOR | ||
27 | END(vsyscall_2) | ||
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 50f63648ce1b..988724b236b6 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -31,6 +31,7 @@ config KVM | |||
31 | select KVM_ASYNC_PF | 31 | select KVM_ASYNC_PF |
32 | select USER_RETURN_NOTIFIER | 32 | select USER_RETURN_NOTIFIER |
33 | select KVM_MMIO | 33 | select KVM_MMIO |
34 | select TASK_DELAY_ACCT | ||
34 | ---help--- | 35 | ---help--- |
35 | Support hosting fully virtualized guest machines using hardware | 36 | Support hosting fully virtualized guest machines using hardware |
36 | virtualization extensions. You will need a fairly recent | 37 | virtualization extensions. You will need a fairly recent |
@@ -76,6 +77,5 @@ config KVM_MMU_AUDIT | |||
76 | # the virtualization menu. | 77 | # the virtualization menu. |
77 | source drivers/vhost/Kconfig | 78 | source drivers/vhost/Kconfig |
78 | source drivers/lguest/Kconfig | 79 | source drivers/lguest/Kconfig |
79 | source drivers/virtio/Kconfig | ||
80 | 80 | ||
81 | endif # VIRTUALIZATION | 81 | endif # VIRTUALIZATION |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index adc98675cda0..6f08bc940fa8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -407,76 +407,59 @@ struct gprefix { | |||
407 | } \ | 407 | } \ |
408 | } while (0) | 408 | } while (0) |
409 | 409 | ||
410 | /* Fetch next part of the instruction being emulated. */ | ||
411 | #define insn_fetch(_type, _size, _eip) \ | ||
412 | ({ unsigned long _x; \ | ||
413 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ | ||
414 | if (rc != X86EMUL_CONTINUE) \ | ||
415 | goto done; \ | ||
416 | (_eip) += (_size); \ | ||
417 | (_type)_x; \ | ||
418 | }) | ||
419 | |||
420 | #define insn_fetch_arr(_arr, _size, _eip) \ | ||
421 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ | ||
422 | if (rc != X86EMUL_CONTINUE) \ | ||
423 | goto done; \ | ||
424 | (_eip) += (_size); \ | ||
425 | }) | ||
426 | |||
427 | static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, | 410 | static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, |
428 | enum x86_intercept intercept, | 411 | enum x86_intercept intercept, |
429 | enum x86_intercept_stage stage) | 412 | enum x86_intercept_stage stage) |
430 | { | 413 | { |
431 | struct x86_instruction_info info = { | 414 | struct x86_instruction_info info = { |
432 | .intercept = intercept, | 415 | .intercept = intercept, |
433 | .rep_prefix = ctxt->decode.rep_prefix, | 416 | .rep_prefix = ctxt->rep_prefix, |
434 | .modrm_mod = ctxt->decode.modrm_mod, | 417 | .modrm_mod = ctxt->modrm_mod, |
435 | .modrm_reg = ctxt->decode.modrm_reg, | 418 | .modrm_reg = ctxt->modrm_reg, |
436 | .modrm_rm = ctxt->decode.modrm_rm, | 419 | .modrm_rm = ctxt->modrm_rm, |
437 | .src_val = ctxt->decode.src.val64, | 420 | .src_val = ctxt->src.val64, |
438 | .src_bytes = ctxt->decode.src.bytes, | 421 | .src_bytes = ctxt->src.bytes, |
439 | .dst_bytes = ctxt->decode.dst.bytes, | 422 | .dst_bytes = ctxt->dst.bytes, |
440 | .ad_bytes = ctxt->decode.ad_bytes, | 423 | .ad_bytes = ctxt->ad_bytes, |
441 | .next_rip = ctxt->eip, | 424 | .next_rip = ctxt->eip, |
442 | }; | 425 | }; |
443 | 426 | ||
444 | return ctxt->ops->intercept(ctxt, &info, stage); | 427 | return ctxt->ops->intercept(ctxt, &info, stage); |
445 | } | 428 | } |
446 | 429 | ||
447 | static inline unsigned long ad_mask(struct decode_cache *c) | 430 | static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) |
448 | { | 431 | { |
449 | return (1UL << (c->ad_bytes << 3)) - 1; | 432 | return (1UL << (ctxt->ad_bytes << 3)) - 1; |
450 | } | 433 | } |
451 | 434 | ||
452 | /* Access/update address held in a register, based on addressing mode. */ | 435 | /* Access/update address held in a register, based on addressing mode. */ |
453 | static inline unsigned long | 436 | static inline unsigned long |
454 | address_mask(struct decode_cache *c, unsigned long reg) | 437 | address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg) |
455 | { | 438 | { |
456 | if (c->ad_bytes == sizeof(unsigned long)) | 439 | if (ctxt->ad_bytes == sizeof(unsigned long)) |
457 | return reg; | 440 | return reg; |
458 | else | 441 | else |
459 | return reg & ad_mask(c); | 442 | return reg & ad_mask(ctxt); |
460 | } | 443 | } |
461 | 444 | ||
462 | static inline unsigned long | 445 | static inline unsigned long |
463 | register_address(struct decode_cache *c, unsigned long reg) | 446 | register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg) |
464 | { | 447 | { |
465 | return address_mask(c, reg); | 448 | return address_mask(ctxt, reg); |
466 | } | 449 | } |
467 | 450 | ||
468 | static inline void | 451 | static inline void |
469 | register_address_increment(struct decode_cache *c, unsigned long *reg, int inc) | 452 | register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc) |
470 | { | 453 | { |
471 | if (c->ad_bytes == sizeof(unsigned long)) | 454 | if (ctxt->ad_bytes == sizeof(unsigned long)) |
472 | *reg += inc; | 455 | *reg += inc; |
473 | else | 456 | else |
474 | *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c)); | 457 | *reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt)); |
475 | } | 458 | } |
476 | 459 | ||
477 | static inline void jmp_rel(struct decode_cache *c, int rel) | 460 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) |
478 | { | 461 | { |
479 | register_address_increment(c, &c->eip, rel); | 462 | register_address_increment(ctxt, &ctxt->_eip, rel); |
480 | } | 463 | } |
481 | 464 | ||
482 | static u32 desc_limit_scaled(struct desc_struct *desc) | 465 | static u32 desc_limit_scaled(struct desc_struct *desc) |
@@ -486,28 +469,26 @@ static u32 desc_limit_scaled(struct desc_struct *desc) | |||
486 | return desc->g ? (limit << 12) | 0xfff : limit; | 469 | return desc->g ? (limit << 12) | 0xfff : limit; |
487 | } | 470 | } |
488 | 471 | ||
489 | static void set_seg_override(struct decode_cache *c, int seg) | 472 | static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg) |
490 | { | 473 | { |
491 | c->has_seg_override = true; | 474 | ctxt->has_seg_override = true; |
492 | c->seg_override = seg; | 475 | ctxt->seg_override = seg; |
493 | } | 476 | } |
494 | 477 | ||
495 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, | 478 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) |
496 | struct x86_emulate_ops *ops, int seg) | ||
497 | { | 479 | { |
498 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | 480 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) |
499 | return 0; | 481 | return 0; |
500 | 482 | ||
501 | return ops->get_cached_segment_base(ctxt, seg); | 483 | return ctxt->ops->get_cached_segment_base(ctxt, seg); |
502 | } | 484 | } |
503 | 485 | ||
504 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt, | 486 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt) |
505 | struct decode_cache *c) | ||
506 | { | 487 | { |
507 | if (!c->has_seg_override) | 488 | if (!ctxt->has_seg_override) |
508 | return 0; | 489 | return 0; |
509 | 490 | ||
510 | return c->seg_override; | 491 | return ctxt->seg_override; |
511 | } | 492 | } |
512 | 493 | ||
513 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | 494 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, |
@@ -579,7 +560,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
579 | unsigned size, bool write, bool fetch, | 560 | unsigned size, bool write, bool fetch, |
580 | ulong *linear) | 561 | ulong *linear) |
581 | { | 562 | { |
582 | struct decode_cache *c = &ctxt->decode; | ||
583 | struct desc_struct desc; | 563 | struct desc_struct desc; |
584 | bool usable; | 564 | bool usable; |
585 | ulong la; | 565 | ulong la; |
@@ -587,7 +567,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
587 | u16 sel; | 567 | u16 sel; |
588 | unsigned cpl, rpl; | 568 | unsigned cpl, rpl; |
589 | 569 | ||
590 | la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; | 570 | la = seg_base(ctxt, addr.seg) + addr.ea; |
591 | switch (ctxt->mode) { | 571 | switch (ctxt->mode) { |
592 | case X86EMUL_MODE_REAL: | 572 | case X86EMUL_MODE_REAL: |
593 | break; | 573 | break; |
@@ -637,7 +617,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
637 | } | 617 | } |
638 | break; | 618 | break; |
639 | } | 619 | } |
640 | if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : c->ad_bytes != 8) | 620 | if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8) |
641 | la &= (u32)-1; | 621 | la &= (u32)-1; |
642 | *linear = la; | 622 | *linear = la; |
643 | return X86EMUL_CONTINUE; | 623 | return X86EMUL_CONTINUE; |
@@ -671,11 +651,10 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, | |||
671 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); | 651 | return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); |
672 | } | 652 | } |
673 | 653 | ||
674 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 654 | static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, |
675 | struct x86_emulate_ops *ops, | ||
676 | unsigned long eip, u8 *dest) | 655 | unsigned long eip, u8 *dest) |
677 | { | 656 | { |
678 | struct fetch_cache *fc = &ctxt->decode.fetch; | 657 | struct fetch_cache *fc = &ctxt->fetch; |
679 | int rc; | 658 | int rc; |
680 | int size, cur_size; | 659 | int size, cur_size; |
681 | 660 | ||
@@ -687,8 +666,8 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
687 | rc = __linearize(ctxt, addr, size, false, true, &linear); | 666 | rc = __linearize(ctxt, addr, size, false, true, &linear); |
688 | if (rc != X86EMUL_CONTINUE) | 667 | if (rc != X86EMUL_CONTINUE) |
689 | return rc; | 668 | return rc; |
690 | rc = ops->fetch(ctxt, linear, fc->data + cur_size, | 669 | rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, |
691 | size, &ctxt->exception); | 670 | size, &ctxt->exception); |
692 | if (rc != X86EMUL_CONTINUE) | 671 | if (rc != X86EMUL_CONTINUE) |
693 | return rc; | 672 | return rc; |
694 | fc->end += size; | 673 | fc->end += size; |
@@ -698,7 +677,6 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
698 | } | 677 | } |
699 | 678 | ||
700 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 679 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
701 | struct x86_emulate_ops *ops, | ||
702 | unsigned long eip, void *dest, unsigned size) | 680 | unsigned long eip, void *dest, unsigned size) |
703 | { | 681 | { |
704 | int rc; | 682 | int rc; |
@@ -707,13 +685,30 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
707 | if (eip + size - ctxt->eip > 15) | 685 | if (eip + size - ctxt->eip > 15) |
708 | return X86EMUL_UNHANDLEABLE; | 686 | return X86EMUL_UNHANDLEABLE; |
709 | while (size--) { | 687 | while (size--) { |
710 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 688 | rc = do_insn_fetch_byte(ctxt, eip++, dest++); |
711 | if (rc != X86EMUL_CONTINUE) | 689 | if (rc != X86EMUL_CONTINUE) |
712 | return rc; | 690 | return rc; |
713 | } | 691 | } |
714 | return X86EMUL_CONTINUE; | 692 | return X86EMUL_CONTINUE; |
715 | } | 693 | } |
716 | 694 | ||
695 | /* Fetch next part of the instruction being emulated. */ | ||
696 | #define insn_fetch(_type, _size, _eip) \ | ||
697 | ({ unsigned long _x; \ | ||
698 | rc = do_insn_fetch(ctxt, (_eip), &_x, (_size)); \ | ||
699 | if (rc != X86EMUL_CONTINUE) \ | ||
700 | goto done; \ | ||
701 | (_eip) += (_size); \ | ||
702 | (_type)_x; \ | ||
703 | }) | ||
704 | |||
705 | #define insn_fetch_arr(_arr, _size, _eip) \ | ||
706 | ({ rc = do_insn_fetch(ctxt, (_eip), _arr, (_size)); \ | ||
707 | if (rc != X86EMUL_CONTINUE) \ | ||
708 | goto done; \ | ||
709 | (_eip) += (_size); \ | ||
710 | }) | ||
711 | |||
717 | /* | 712 | /* |
718 | * Given the 'reg' portion of a ModRM byte, and a register block, return a | 713 | * Given the 'reg' portion of a ModRM byte, and a register block, return a |
719 | * pointer into the block that addresses the relevant register. | 714 | * pointer into the block that addresses the relevant register. |
@@ -857,16 +852,15 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, | |||
857 | 852 | ||
858 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | 853 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, |
859 | struct operand *op, | 854 | struct operand *op, |
860 | struct decode_cache *c, | ||
861 | int inhibit_bytereg) | 855 | int inhibit_bytereg) |
862 | { | 856 | { |
863 | unsigned reg = c->modrm_reg; | 857 | unsigned reg = ctxt->modrm_reg; |
864 | int highbyte_regs = c->rex_prefix == 0; | 858 | int highbyte_regs = ctxt->rex_prefix == 0; |
865 | 859 | ||
866 | if (!(c->d & ModRM)) | 860 | if (!(ctxt->d & ModRM)) |
867 | reg = (c->b & 7) | ((c->rex_prefix & 1) << 3); | 861 | reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3); |
868 | 862 | ||
869 | if (c->d & Sse) { | 863 | if (ctxt->d & Sse) { |
870 | op->type = OP_XMM; | 864 | op->type = OP_XMM; |
871 | op->bytes = 16; | 865 | op->bytes = 16; |
872 | op->addr.xmm = reg; | 866 | op->addr.xmm = reg; |
@@ -875,49 +869,47 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
875 | } | 869 | } |
876 | 870 | ||
877 | op->type = OP_REG; | 871 | op->type = OP_REG; |
878 | if ((c->d & ByteOp) && !inhibit_bytereg) { | 872 | if ((ctxt->d & ByteOp) && !inhibit_bytereg) { |
879 | op->addr.reg = decode_register(reg, c->regs, highbyte_regs); | 873 | op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); |
880 | op->bytes = 1; | 874 | op->bytes = 1; |
881 | } else { | 875 | } else { |
882 | op->addr.reg = decode_register(reg, c->regs, 0); | 876 | op->addr.reg = decode_register(reg, ctxt->regs, 0); |
883 | op->bytes = c->op_bytes; | 877 | op->bytes = ctxt->op_bytes; |
884 | } | 878 | } |
885 | fetch_register_operand(op); | 879 | fetch_register_operand(op); |
886 | op->orig_val = op->val; | 880 | op->orig_val = op->val; |
887 | } | 881 | } |
888 | 882 | ||
889 | static int decode_modrm(struct x86_emulate_ctxt *ctxt, | 883 | static int decode_modrm(struct x86_emulate_ctxt *ctxt, |
890 | struct x86_emulate_ops *ops, | ||
891 | struct operand *op) | 884 | struct operand *op) |
892 | { | 885 | { |
893 | struct decode_cache *c = &ctxt->decode; | ||
894 | u8 sib; | 886 | u8 sib; |
895 | int index_reg = 0, base_reg = 0, scale; | 887 | int index_reg = 0, base_reg = 0, scale; |
896 | int rc = X86EMUL_CONTINUE; | 888 | int rc = X86EMUL_CONTINUE; |
897 | ulong modrm_ea = 0; | 889 | ulong modrm_ea = 0; |
898 | 890 | ||
899 | if (c->rex_prefix) { | 891 | if (ctxt->rex_prefix) { |
900 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ | 892 | ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */ |
901 | index_reg = (c->rex_prefix & 2) << 2; /* REX.X */ | 893 | index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */ |
902 | c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */ | 894 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ |
903 | } | 895 | } |
904 | 896 | ||
905 | c->modrm = insn_fetch(u8, 1, c->eip); | 897 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); |
906 | c->modrm_mod |= (c->modrm & 0xc0) >> 6; | 898 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; |
907 | c->modrm_reg |= (c->modrm & 0x38) >> 3; | 899 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; |
908 | c->modrm_rm |= (c->modrm & 0x07); | 900 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); |
909 | c->modrm_seg = VCPU_SREG_DS; | 901 | ctxt->modrm_seg = VCPU_SREG_DS; |
910 | 902 | ||
911 | if (c->modrm_mod == 3) { | 903 | if (ctxt->modrm_mod == 3) { |
912 | op->type = OP_REG; | 904 | op->type = OP_REG; |
913 | op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 905 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
914 | op->addr.reg = decode_register(c->modrm_rm, | 906 | op->addr.reg = decode_register(ctxt->modrm_rm, |
915 | c->regs, c->d & ByteOp); | 907 | ctxt->regs, ctxt->d & ByteOp); |
916 | if (c->d & Sse) { | 908 | if (ctxt->d & Sse) { |
917 | op->type = OP_XMM; | 909 | op->type = OP_XMM; |
918 | op->bytes = 16; | 910 | op->bytes = 16; |
919 | op->addr.xmm = c->modrm_rm; | 911 | op->addr.xmm = ctxt->modrm_rm; |
920 | read_sse_reg(ctxt, &op->vec_val, c->modrm_rm); | 912 | read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm); |
921 | return rc; | 913 | return rc; |
922 | } | 914 | } |
923 | fetch_register_operand(op); | 915 | fetch_register_operand(op); |
@@ -926,26 +918,26 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
926 | 918 | ||
927 | op->type = OP_MEM; | 919 | op->type = OP_MEM; |
928 | 920 | ||
929 | if (c->ad_bytes == 2) { | 921 | if (ctxt->ad_bytes == 2) { |
930 | unsigned bx = c->regs[VCPU_REGS_RBX]; | 922 | unsigned bx = ctxt->regs[VCPU_REGS_RBX]; |
931 | unsigned bp = c->regs[VCPU_REGS_RBP]; | 923 | unsigned bp = ctxt->regs[VCPU_REGS_RBP]; |
932 | unsigned si = c->regs[VCPU_REGS_RSI]; | 924 | unsigned si = ctxt->regs[VCPU_REGS_RSI]; |
933 | unsigned di = c->regs[VCPU_REGS_RDI]; | 925 | unsigned di = ctxt->regs[VCPU_REGS_RDI]; |
934 | 926 | ||
935 | /* 16-bit ModR/M decode. */ | 927 | /* 16-bit ModR/M decode. */ |
936 | switch (c->modrm_mod) { | 928 | switch (ctxt->modrm_mod) { |
937 | case 0: | 929 | case 0: |
938 | if (c->modrm_rm == 6) | 930 | if (ctxt->modrm_rm == 6) |
939 | modrm_ea += insn_fetch(u16, 2, c->eip); | 931 | modrm_ea += insn_fetch(u16, 2, ctxt->_eip); |
940 | break; | 932 | break; |
941 | case 1: | 933 | case 1: |
942 | modrm_ea += insn_fetch(s8, 1, c->eip); | 934 | modrm_ea += insn_fetch(s8, 1, ctxt->_eip); |
943 | break; | 935 | break; |
944 | case 2: | 936 | case 2: |
945 | modrm_ea += insn_fetch(u16, 2, c->eip); | 937 | modrm_ea += insn_fetch(u16, 2, ctxt->_eip); |
946 | break; | 938 | break; |
947 | } | 939 | } |
948 | switch (c->modrm_rm) { | 940 | switch (ctxt->modrm_rm) { |
949 | case 0: | 941 | case 0: |
950 | modrm_ea += bx + si; | 942 | modrm_ea += bx + si; |
951 | break; | 943 | break; |
@@ -965,46 +957,46 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
965 | modrm_ea += di; | 957 | modrm_ea += di; |
966 | break; | 958 | break; |
967 | case 6: | 959 | case 6: |
968 | if (c->modrm_mod != 0) | 960 | if (ctxt->modrm_mod != 0) |
969 | modrm_ea += bp; | 961 | modrm_ea += bp; |
970 | break; | 962 | break; |
971 | case 7: | 963 | case 7: |
972 | modrm_ea += bx; | 964 | modrm_ea += bx; |
973 | break; | 965 | break; |
974 | } | 966 | } |
975 | if (c->modrm_rm == 2 || c->modrm_rm == 3 || | 967 | if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 || |
976 | (c->modrm_rm == 6 && c->modrm_mod != 0)) | 968 | (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0)) |
977 | c->modrm_seg = VCPU_SREG_SS; | 969 | ctxt->modrm_seg = VCPU_SREG_SS; |
978 | modrm_ea = (u16)modrm_ea; | 970 | modrm_ea = (u16)modrm_ea; |
979 | } else { | 971 | } else { |
980 | /* 32/64-bit ModR/M decode. */ | 972 | /* 32/64-bit ModR/M decode. */ |
981 | if ((c->modrm_rm & 7) == 4) { | 973 | if ((ctxt->modrm_rm & 7) == 4) { |
982 | sib = insn_fetch(u8, 1, c->eip); | 974 | sib = insn_fetch(u8, 1, ctxt->_eip); |
983 | index_reg |= (sib >> 3) & 7; | 975 | index_reg |= (sib >> 3) & 7; |
984 | base_reg |= sib & 7; | 976 | base_reg |= sib & 7; |
985 | scale = sib >> 6; | 977 | scale = sib >> 6; |
986 | 978 | ||
987 | if ((base_reg & 7) == 5 && c->modrm_mod == 0) | 979 | if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) |
988 | modrm_ea += insn_fetch(s32, 4, c->eip); | 980 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); |
989 | else | 981 | else |
990 | modrm_ea += c->regs[base_reg]; | 982 | modrm_ea += ctxt->regs[base_reg]; |
991 | if (index_reg != 4) | 983 | if (index_reg != 4) |
992 | modrm_ea += c->regs[index_reg] << scale; | 984 | modrm_ea += ctxt->regs[index_reg] << scale; |
993 | } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) { | 985 | } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { |
994 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 986 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
995 | c->rip_relative = 1; | 987 | ctxt->rip_relative = 1; |
996 | } else | 988 | } else |
997 | modrm_ea += c->regs[c->modrm_rm]; | 989 | modrm_ea += ctxt->regs[ctxt->modrm_rm]; |
998 | switch (c->modrm_mod) { | 990 | switch (ctxt->modrm_mod) { |
999 | case 0: | 991 | case 0: |
1000 | if (c->modrm_rm == 5) | 992 | if (ctxt->modrm_rm == 5) |
1001 | modrm_ea += insn_fetch(s32, 4, c->eip); | 993 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); |
1002 | break; | 994 | break; |
1003 | case 1: | 995 | case 1: |
1004 | modrm_ea += insn_fetch(s8, 1, c->eip); | 996 | modrm_ea += insn_fetch(s8, 1, ctxt->_eip); |
1005 | break; | 997 | break; |
1006 | case 2: | 998 | case 2: |
1007 | modrm_ea += insn_fetch(s32, 4, c->eip); | 999 | modrm_ea += insn_fetch(s32, 4, ctxt->_eip); |
1008 | break; | 1000 | break; |
1009 | } | 1001 | } |
1010 | } | 1002 | } |
@@ -1014,53 +1006,50 @@ done: | |||
1014 | } | 1006 | } |
1015 | 1007 | ||
1016 | static int decode_abs(struct x86_emulate_ctxt *ctxt, | 1008 | static int decode_abs(struct x86_emulate_ctxt *ctxt, |
1017 | struct x86_emulate_ops *ops, | ||
1018 | struct operand *op) | 1009 | struct operand *op) |
1019 | { | 1010 | { |
1020 | struct decode_cache *c = &ctxt->decode; | ||
1021 | int rc = X86EMUL_CONTINUE; | 1011 | int rc = X86EMUL_CONTINUE; |
1022 | 1012 | ||
1023 | op->type = OP_MEM; | 1013 | op->type = OP_MEM; |
1024 | switch (c->ad_bytes) { | 1014 | switch (ctxt->ad_bytes) { |
1025 | case 2: | 1015 | case 2: |
1026 | op->addr.mem.ea = insn_fetch(u16, 2, c->eip); | 1016 | op->addr.mem.ea = insn_fetch(u16, 2, ctxt->_eip); |
1027 | break; | 1017 | break; |
1028 | case 4: | 1018 | case 4: |
1029 | op->addr.mem.ea = insn_fetch(u32, 4, c->eip); | 1019 | op->addr.mem.ea = insn_fetch(u32, 4, ctxt->_eip); |
1030 | break; | 1020 | break; |
1031 | case 8: | 1021 | case 8: |
1032 | op->addr.mem.ea = insn_fetch(u64, 8, c->eip); | 1022 | op->addr.mem.ea = insn_fetch(u64, 8, ctxt->_eip); |
1033 | break; | 1023 | break; |
1034 | } | 1024 | } |
1035 | done: | 1025 | done: |
1036 | return rc; | 1026 | return rc; |
1037 | } | 1027 | } |
1038 | 1028 | ||
1039 | static void fetch_bit_operand(struct decode_cache *c) | 1029 | static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt) |
1040 | { | 1030 | { |
1041 | long sv = 0, mask; | 1031 | long sv = 0, mask; |
1042 | 1032 | ||
1043 | if (c->dst.type == OP_MEM && c->src.type == OP_REG) { | 1033 | if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { |
1044 | mask = ~(c->dst.bytes * 8 - 1); | 1034 | mask = ~(ctxt->dst.bytes * 8 - 1); |
1045 | 1035 | ||
1046 | if (c->src.bytes == 2) | 1036 | if (ctxt->src.bytes == 2) |
1047 | sv = (s16)c->src.val & (s16)mask; | 1037 | sv = (s16)ctxt->src.val & (s16)mask; |
1048 | else if (c->src.bytes == 4) | 1038 | else if (ctxt->src.bytes == 4) |
1049 | sv = (s32)c->src.val & (s32)mask; | 1039 | sv = (s32)ctxt->src.val & (s32)mask; |
1050 | 1040 | ||
1051 | c->dst.addr.mem.ea += (sv >> 3); | 1041 | ctxt->dst.addr.mem.ea += (sv >> 3); |
1052 | } | 1042 | } |
1053 | 1043 | ||
1054 | /* only subword offset */ | 1044 | /* only subword offset */ |
1055 | c->src.val &= (c->dst.bytes << 3) - 1; | 1045 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; |
1056 | } | 1046 | } |
1057 | 1047 | ||
1058 | static int read_emulated(struct x86_emulate_ctxt *ctxt, | 1048 | static int read_emulated(struct x86_emulate_ctxt *ctxt, |
1059 | struct x86_emulate_ops *ops, | ||
1060 | unsigned long addr, void *dest, unsigned size) | 1049 | unsigned long addr, void *dest, unsigned size) |
1061 | { | 1050 | { |
1062 | int rc; | 1051 | int rc; |
1063 | struct read_cache *mc = &ctxt->decode.mem_read; | 1052 | struct read_cache *mc = &ctxt->mem_read; |
1064 | 1053 | ||
1065 | while (size) { | 1054 | while (size) { |
1066 | int n = min(size, 8u); | 1055 | int n = min(size, 8u); |
@@ -1068,8 +1057,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, | |||
1068 | if (mc->pos < mc->end) | 1057 | if (mc->pos < mc->end) |
1069 | goto read_cached; | 1058 | goto read_cached; |
1070 | 1059 | ||
1071 | rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, | 1060 | rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n, |
1072 | &ctxt->exception); | 1061 | &ctxt->exception); |
1073 | if (rc != X86EMUL_CONTINUE) | 1062 | if (rc != X86EMUL_CONTINUE) |
1074 | return rc; | 1063 | return rc; |
1075 | mc->end += n; | 1064 | mc->end += n; |
@@ -1094,7 +1083,7 @@ static int segmented_read(struct x86_emulate_ctxt *ctxt, | |||
1094 | rc = linearize(ctxt, addr, size, false, &linear); | 1083 | rc = linearize(ctxt, addr, size, false, &linear); |
1095 | if (rc != X86EMUL_CONTINUE) | 1084 | if (rc != X86EMUL_CONTINUE) |
1096 | return rc; | 1085 | return rc; |
1097 | return read_emulated(ctxt, ctxt->ops, linear, data, size); | 1086 | return read_emulated(ctxt, linear, data, size); |
1098 | } | 1087 | } |
1099 | 1088 | ||
1100 | static int segmented_write(struct x86_emulate_ctxt *ctxt, | 1089 | static int segmented_write(struct x86_emulate_ctxt *ctxt, |
@@ -1128,26 +1117,24 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, | |||
1128 | } | 1117 | } |
1129 | 1118 | ||
1130 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 1119 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
1131 | struct x86_emulate_ops *ops, | ||
1132 | unsigned int size, unsigned short port, | 1120 | unsigned int size, unsigned short port, |
1133 | void *dest) | 1121 | void *dest) |
1134 | { | 1122 | { |
1135 | struct read_cache *rc = &ctxt->decode.io_read; | 1123 | struct read_cache *rc = &ctxt->io_read; |
1136 | 1124 | ||
1137 | if (rc->pos == rc->end) { /* refill pio read ahead */ | 1125 | if (rc->pos == rc->end) { /* refill pio read ahead */ |
1138 | struct decode_cache *c = &ctxt->decode; | ||
1139 | unsigned int in_page, n; | 1126 | unsigned int in_page, n; |
1140 | unsigned int count = c->rep_prefix ? | 1127 | unsigned int count = ctxt->rep_prefix ? |
1141 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; | 1128 | address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1; |
1142 | in_page = (ctxt->eflags & EFLG_DF) ? | 1129 | in_page = (ctxt->eflags & EFLG_DF) ? |
1143 | offset_in_page(c->regs[VCPU_REGS_RDI]) : | 1130 | offset_in_page(ctxt->regs[VCPU_REGS_RDI]) : |
1144 | PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); | 1131 | PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]); |
1145 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | 1132 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, |
1146 | count); | 1133 | count); |
1147 | if (n == 0) | 1134 | if (n == 0) |
1148 | n = 1; | 1135 | n = 1; |
1149 | rc->pos = rc->end = 0; | 1136 | rc->pos = rc->end = 0; |
1150 | if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) | 1137 | if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n)) |
1151 | return 0; | 1138 | return 0; |
1152 | rc->end = n * size; | 1139 | rc->end = n * size; |
1153 | } | 1140 | } |
@@ -1158,9 +1145,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1158 | } | 1145 | } |
1159 | 1146 | ||
1160 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | 1147 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, |
1161 | struct x86_emulate_ops *ops, | ||
1162 | u16 selector, struct desc_ptr *dt) | 1148 | u16 selector, struct desc_ptr *dt) |
1163 | { | 1149 | { |
1150 | struct x86_emulate_ops *ops = ctxt->ops; | ||
1151 | |||
1164 | if (selector & 1 << 2) { | 1152 | if (selector & 1 << 2) { |
1165 | struct desc_struct desc; | 1153 | struct desc_struct desc; |
1166 | u16 sel; | 1154 | u16 sel; |
@@ -1177,48 +1165,42 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
1177 | 1165 | ||
1178 | /* allowed just for 8 bytes segments */ | 1166 | /* allowed just for 8 bytes segments */ |
1179 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1167 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1180 | struct x86_emulate_ops *ops, | ||
1181 | u16 selector, struct desc_struct *desc) | 1168 | u16 selector, struct desc_struct *desc) |
1182 | { | 1169 | { |
1183 | struct desc_ptr dt; | 1170 | struct desc_ptr dt; |
1184 | u16 index = selector >> 3; | 1171 | u16 index = selector >> 3; |
1185 | int ret; | ||
1186 | ulong addr; | 1172 | ulong addr; |
1187 | 1173 | ||
1188 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1174 | get_descriptor_table_ptr(ctxt, selector, &dt); |
1189 | 1175 | ||
1190 | if (dt.size < index * 8 + 7) | 1176 | if (dt.size < index * 8 + 7) |
1191 | return emulate_gp(ctxt, selector & 0xfffc); | 1177 | return emulate_gp(ctxt, selector & 0xfffc); |
1192 | addr = dt.address + index * 8; | ||
1193 | ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); | ||
1194 | 1178 | ||
1195 | return ret; | 1179 | addr = dt.address + index * 8; |
1180 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, | ||
1181 | &ctxt->exception); | ||
1196 | } | 1182 | } |
1197 | 1183 | ||
1198 | /* allowed just for 8 bytes segments */ | 1184 | /* allowed just for 8 bytes segments */ |
1199 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1185 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1200 | struct x86_emulate_ops *ops, | ||
1201 | u16 selector, struct desc_struct *desc) | 1186 | u16 selector, struct desc_struct *desc) |
1202 | { | 1187 | { |
1203 | struct desc_ptr dt; | 1188 | struct desc_ptr dt; |
1204 | u16 index = selector >> 3; | 1189 | u16 index = selector >> 3; |
1205 | ulong addr; | 1190 | ulong addr; |
1206 | int ret; | ||
1207 | 1191 | ||
1208 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1192 | get_descriptor_table_ptr(ctxt, selector, &dt); |
1209 | 1193 | ||
1210 | if (dt.size < index * 8 + 7) | 1194 | if (dt.size < index * 8 + 7) |
1211 | return emulate_gp(ctxt, selector & 0xfffc); | 1195 | return emulate_gp(ctxt, selector & 0xfffc); |
1212 | 1196 | ||
1213 | addr = dt.address + index * 8; | 1197 | addr = dt.address + index * 8; |
1214 | ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); | 1198 | return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, |
1215 | 1199 | &ctxt->exception); | |
1216 | return ret; | ||
1217 | } | 1200 | } |
1218 | 1201 | ||
1219 | /* Does not support long mode */ | 1202 | /* Does not support long mode */ |
1220 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1203 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1221 | struct x86_emulate_ops *ops, | ||
1222 | u16 selector, int seg) | 1204 | u16 selector, int seg) |
1223 | { | 1205 | { |
1224 | struct desc_struct seg_desc; | 1206 | struct desc_struct seg_desc; |
@@ -1253,7 +1235,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1253 | if (null_selector) /* for NULL selector skip all following checks */ | 1235 | if (null_selector) /* for NULL selector skip all following checks */ |
1254 | goto load; | 1236 | goto load; |
1255 | 1237 | ||
1256 | ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); | 1238 | ret = read_segment_descriptor(ctxt, selector, &seg_desc); |
1257 | if (ret != X86EMUL_CONTINUE) | 1239 | if (ret != X86EMUL_CONTINUE) |
1258 | return ret; | 1240 | return ret; |
1259 | 1241 | ||
@@ -1271,7 +1253,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1271 | 1253 | ||
1272 | rpl = selector & 3; | 1254 | rpl = selector & 3; |
1273 | dpl = seg_desc.dpl; | 1255 | dpl = seg_desc.dpl; |
1274 | cpl = ops->cpl(ctxt); | 1256 | cpl = ctxt->ops->cpl(ctxt); |
1275 | 1257 | ||
1276 | switch (seg) { | 1258 | switch (seg) { |
1277 | case VCPU_SREG_SS: | 1259 | case VCPU_SREG_SS: |
@@ -1322,12 +1304,12 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1322 | if (seg_desc.s) { | 1304 | if (seg_desc.s) { |
1323 | /* mark segment as accessed */ | 1305 | /* mark segment as accessed */ |
1324 | seg_desc.type |= 1; | 1306 | seg_desc.type |= 1; |
1325 | ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); | 1307 | ret = write_segment_descriptor(ctxt, selector, &seg_desc); |
1326 | if (ret != X86EMUL_CONTINUE) | 1308 | if (ret != X86EMUL_CONTINUE) |
1327 | return ret; | 1309 | return ret; |
1328 | } | 1310 | } |
1329 | load: | 1311 | load: |
1330 | ops->set_segment(ctxt, selector, &seg_desc, 0, seg); | 1312 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); |
1331 | return X86EMUL_CONTINUE; | 1313 | return X86EMUL_CONTINUE; |
1332 | exception: | 1314 | exception: |
1333 | emulate_exception(ctxt, err_vec, err_code, true); | 1315 | emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1356,29 +1338,28 @@ static void write_register_operand(struct operand *op) | |||
1356 | static int writeback(struct x86_emulate_ctxt *ctxt) | 1338 | static int writeback(struct x86_emulate_ctxt *ctxt) |
1357 | { | 1339 | { |
1358 | int rc; | 1340 | int rc; |
1359 | struct decode_cache *c = &ctxt->decode; | ||
1360 | 1341 | ||
1361 | switch (c->dst.type) { | 1342 | switch (ctxt->dst.type) { |
1362 | case OP_REG: | 1343 | case OP_REG: |
1363 | write_register_operand(&c->dst); | 1344 | write_register_operand(&ctxt->dst); |
1364 | break; | 1345 | break; |
1365 | case OP_MEM: | 1346 | case OP_MEM: |
1366 | if (c->lock_prefix) | 1347 | if (ctxt->lock_prefix) |
1367 | rc = segmented_cmpxchg(ctxt, | 1348 | rc = segmented_cmpxchg(ctxt, |
1368 | c->dst.addr.mem, | 1349 | ctxt->dst.addr.mem, |
1369 | &c->dst.orig_val, | 1350 | &ctxt->dst.orig_val, |
1370 | &c->dst.val, | 1351 | &ctxt->dst.val, |
1371 | c->dst.bytes); | 1352 | ctxt->dst.bytes); |
1372 | else | 1353 | else |
1373 | rc = segmented_write(ctxt, | 1354 | rc = segmented_write(ctxt, |
1374 | c->dst.addr.mem, | 1355 | ctxt->dst.addr.mem, |
1375 | &c->dst.val, | 1356 | &ctxt->dst.val, |
1376 | c->dst.bytes); | 1357 | ctxt->dst.bytes); |
1377 | if (rc != X86EMUL_CONTINUE) | 1358 | if (rc != X86EMUL_CONTINUE) |
1378 | return rc; | 1359 | return rc; |
1379 | break; | 1360 | break; |
1380 | case OP_XMM: | 1361 | case OP_XMM: |
1381 | write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm); | 1362 | write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); |
1382 | break; | 1363 | break; |
1383 | case OP_NONE: | 1364 | case OP_NONE: |
1384 | /* no writeback */ | 1365 | /* no writeback */ |
@@ -1391,50 +1372,45 @@ static int writeback(struct x86_emulate_ctxt *ctxt) | |||
1391 | 1372 | ||
1392 | static int em_push(struct x86_emulate_ctxt *ctxt) | 1373 | static int em_push(struct x86_emulate_ctxt *ctxt) |
1393 | { | 1374 | { |
1394 | struct decode_cache *c = &ctxt->decode; | ||
1395 | struct segmented_address addr; | 1375 | struct segmented_address addr; |
1396 | 1376 | ||
1397 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1377 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -ctxt->op_bytes); |
1398 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); | 1378 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); |
1399 | addr.seg = VCPU_SREG_SS; | 1379 | addr.seg = VCPU_SREG_SS; |
1400 | 1380 | ||
1401 | /* Disable writeback. */ | 1381 | /* Disable writeback. */ |
1402 | c->dst.type = OP_NONE; | 1382 | ctxt->dst.type = OP_NONE; |
1403 | return segmented_write(ctxt, addr, &c->src.val, c->op_bytes); | 1383 | return segmented_write(ctxt, addr, &ctxt->src.val, ctxt->op_bytes); |
1404 | } | 1384 | } |
1405 | 1385 | ||
1406 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, | 1386 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
1407 | void *dest, int len) | 1387 | void *dest, int len) |
1408 | { | 1388 | { |
1409 | struct decode_cache *c = &ctxt->decode; | ||
1410 | int rc; | 1389 | int rc; |
1411 | struct segmented_address addr; | 1390 | struct segmented_address addr; |
1412 | 1391 | ||
1413 | addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]); | 1392 | addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); |
1414 | addr.seg = VCPU_SREG_SS; | 1393 | addr.seg = VCPU_SREG_SS; |
1415 | rc = segmented_read(ctxt, addr, dest, len); | 1394 | rc = segmented_read(ctxt, addr, dest, len); |
1416 | if (rc != X86EMUL_CONTINUE) | 1395 | if (rc != X86EMUL_CONTINUE) |
1417 | return rc; | 1396 | return rc; |
1418 | 1397 | ||
1419 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); | 1398 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len); |
1420 | return rc; | 1399 | return rc; |
1421 | } | 1400 | } |
1422 | 1401 | ||
1423 | static int em_pop(struct x86_emulate_ctxt *ctxt) | 1402 | static int em_pop(struct x86_emulate_ctxt *ctxt) |
1424 | { | 1403 | { |
1425 | struct decode_cache *c = &ctxt->decode; | 1404 | return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
1426 | |||
1427 | return emulate_pop(ctxt, &c->dst.val, c->op_bytes); | ||
1428 | } | 1405 | } |
1429 | 1406 | ||
1430 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, | 1407 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, |
1431 | struct x86_emulate_ops *ops, | 1408 | void *dest, int len) |
1432 | void *dest, int len) | ||
1433 | { | 1409 | { |
1434 | int rc; | 1410 | int rc; |
1435 | unsigned long val, change_mask; | 1411 | unsigned long val, change_mask; |
1436 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1412 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1437 | int cpl = ops->cpl(ctxt); | 1413 | int cpl = ctxt->ops->cpl(ctxt); |
1438 | 1414 | ||
1439 | rc = emulate_pop(ctxt, &val, len); | 1415 | rc = emulate_pop(ctxt, &val, len); |
1440 | if (rc != X86EMUL_CONTINUE) | 1416 | if (rc != X86EMUL_CONTINUE) |
@@ -1470,49 +1446,41 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1470 | 1446 | ||
1471 | static int em_popf(struct x86_emulate_ctxt *ctxt) | 1447 | static int em_popf(struct x86_emulate_ctxt *ctxt) |
1472 | { | 1448 | { |
1473 | struct decode_cache *c = &ctxt->decode; | 1449 | ctxt->dst.type = OP_REG; |
1474 | 1450 | ctxt->dst.addr.reg = &ctxt->eflags; | |
1475 | c->dst.type = OP_REG; | 1451 | ctxt->dst.bytes = ctxt->op_bytes; |
1476 | c->dst.addr.reg = &ctxt->eflags; | 1452 | return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
1477 | c->dst.bytes = c->op_bytes; | ||
1478 | return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); | ||
1479 | } | 1453 | } |
1480 | 1454 | ||
1481 | static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, | 1455 | static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) |
1482 | struct x86_emulate_ops *ops, int seg) | ||
1483 | { | 1456 | { |
1484 | struct decode_cache *c = &ctxt->decode; | 1457 | ctxt->src.val = get_segment_selector(ctxt, seg); |
1485 | |||
1486 | c->src.val = get_segment_selector(ctxt, seg); | ||
1487 | 1458 | ||
1488 | return em_push(ctxt); | 1459 | return em_push(ctxt); |
1489 | } | 1460 | } |
1490 | 1461 | ||
1491 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | 1462 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, int seg) |
1492 | struct x86_emulate_ops *ops, int seg) | ||
1493 | { | 1463 | { |
1494 | struct decode_cache *c = &ctxt->decode; | ||
1495 | unsigned long selector; | 1464 | unsigned long selector; |
1496 | int rc; | 1465 | int rc; |
1497 | 1466 | ||
1498 | rc = emulate_pop(ctxt, &selector, c->op_bytes); | 1467 | rc = emulate_pop(ctxt, &selector, ctxt->op_bytes); |
1499 | if (rc != X86EMUL_CONTINUE) | 1468 | if (rc != X86EMUL_CONTINUE) |
1500 | return rc; | 1469 | return rc; |
1501 | 1470 | ||
1502 | rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); | 1471 | rc = load_segment_descriptor(ctxt, (u16)selector, seg); |
1503 | return rc; | 1472 | return rc; |
1504 | } | 1473 | } |
1505 | 1474 | ||
1506 | static int em_pusha(struct x86_emulate_ctxt *ctxt) | 1475 | static int em_pusha(struct x86_emulate_ctxt *ctxt) |
1507 | { | 1476 | { |
1508 | struct decode_cache *c = &ctxt->decode; | 1477 | unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP]; |
1509 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | ||
1510 | int rc = X86EMUL_CONTINUE; | 1478 | int rc = X86EMUL_CONTINUE; |
1511 | int reg = VCPU_REGS_RAX; | 1479 | int reg = VCPU_REGS_RAX; |
1512 | 1480 | ||
1513 | while (reg <= VCPU_REGS_RDI) { | 1481 | while (reg <= VCPU_REGS_RDI) { |
1514 | (reg == VCPU_REGS_RSP) ? | 1482 | (reg == VCPU_REGS_RSP) ? |
1515 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | 1483 | (ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]); |
1516 | 1484 | ||
1517 | rc = em_push(ctxt); | 1485 | rc = em_push(ctxt); |
1518 | if (rc != X86EMUL_CONTINUE) | 1486 | if (rc != X86EMUL_CONTINUE) |
@@ -1526,26 +1494,23 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) | |||
1526 | 1494 | ||
1527 | static int em_pushf(struct x86_emulate_ctxt *ctxt) | 1495 | static int em_pushf(struct x86_emulate_ctxt *ctxt) |
1528 | { | 1496 | { |
1529 | struct decode_cache *c = &ctxt->decode; | 1497 | ctxt->src.val = (unsigned long)ctxt->eflags; |
1530 | |||
1531 | c->src.val = (unsigned long)ctxt->eflags; | ||
1532 | return em_push(ctxt); | 1498 | return em_push(ctxt); |
1533 | } | 1499 | } |
1534 | 1500 | ||
1535 | static int em_popa(struct x86_emulate_ctxt *ctxt) | 1501 | static int em_popa(struct x86_emulate_ctxt *ctxt) |
1536 | { | 1502 | { |
1537 | struct decode_cache *c = &ctxt->decode; | ||
1538 | int rc = X86EMUL_CONTINUE; | 1503 | int rc = X86EMUL_CONTINUE; |
1539 | int reg = VCPU_REGS_RDI; | 1504 | int reg = VCPU_REGS_RDI; |
1540 | 1505 | ||
1541 | while (reg >= VCPU_REGS_RAX) { | 1506 | while (reg >= VCPU_REGS_RAX) { |
1542 | if (reg == VCPU_REGS_RSP) { | 1507 | if (reg == VCPU_REGS_RSP) { |
1543 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | 1508 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], |
1544 | c->op_bytes); | 1509 | ctxt->op_bytes); |
1545 | --reg; | 1510 | --reg; |
1546 | } | 1511 | } |
1547 | 1512 | ||
1548 | rc = emulate_pop(ctxt, &c->regs[reg], c->op_bytes); | 1513 | rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes); |
1549 | if (rc != X86EMUL_CONTINUE) | 1514 | if (rc != X86EMUL_CONTINUE) |
1550 | break; | 1515 | break; |
1551 | --reg; | 1516 | --reg; |
@@ -1553,10 +1518,9 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) | |||
1553 | return rc; | 1518 | return rc; |
1554 | } | 1519 | } |
1555 | 1520 | ||
1556 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, | 1521 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) |
1557 | struct x86_emulate_ops *ops, int irq) | ||
1558 | { | 1522 | { |
1559 | struct decode_cache *c = &ctxt->decode; | 1523 | struct x86_emulate_ops *ops = ctxt->ops; |
1560 | int rc; | 1524 | int rc; |
1561 | struct desc_ptr dt; | 1525 | struct desc_ptr dt; |
1562 | gva_t cs_addr; | 1526 | gva_t cs_addr; |
@@ -1564,19 +1528,19 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1564 | u16 cs, eip; | 1528 | u16 cs, eip; |
1565 | 1529 | ||
1566 | /* TODO: Add limit checks */ | 1530 | /* TODO: Add limit checks */ |
1567 | c->src.val = ctxt->eflags; | 1531 | ctxt->src.val = ctxt->eflags; |
1568 | rc = em_push(ctxt); | 1532 | rc = em_push(ctxt); |
1569 | if (rc != X86EMUL_CONTINUE) | 1533 | if (rc != X86EMUL_CONTINUE) |
1570 | return rc; | 1534 | return rc; |
1571 | 1535 | ||
1572 | ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); | 1536 | ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); |
1573 | 1537 | ||
1574 | c->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); | 1538 | ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); |
1575 | rc = em_push(ctxt); | 1539 | rc = em_push(ctxt); |
1576 | if (rc != X86EMUL_CONTINUE) | 1540 | if (rc != X86EMUL_CONTINUE) |
1577 | return rc; | 1541 | return rc; |
1578 | 1542 | ||
1579 | c->src.val = c->eip; | 1543 | ctxt->src.val = ctxt->_eip; |
1580 | rc = em_push(ctxt); | 1544 | rc = em_push(ctxt); |
1581 | if (rc != X86EMUL_CONTINUE) | 1545 | if (rc != X86EMUL_CONTINUE) |
1582 | return rc; | 1546 | return rc; |
@@ -1594,21 +1558,20 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, | |||
1594 | if (rc != X86EMUL_CONTINUE) | 1558 | if (rc != X86EMUL_CONTINUE) |
1595 | return rc; | 1559 | return rc; |
1596 | 1560 | ||
1597 | rc = load_segment_descriptor(ctxt, ops, cs, VCPU_SREG_CS); | 1561 | rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS); |
1598 | if (rc != X86EMUL_CONTINUE) | 1562 | if (rc != X86EMUL_CONTINUE) |
1599 | return rc; | 1563 | return rc; |
1600 | 1564 | ||
1601 | c->eip = eip; | 1565 | ctxt->_eip = eip; |
1602 | 1566 | ||
1603 | return rc; | 1567 | return rc; |
1604 | } | 1568 | } |
1605 | 1569 | ||
1606 | static int emulate_int(struct x86_emulate_ctxt *ctxt, | 1570 | static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq) |
1607 | struct x86_emulate_ops *ops, int irq) | ||
1608 | { | 1571 | { |
1609 | switch(ctxt->mode) { | 1572 | switch(ctxt->mode) { |
1610 | case X86EMUL_MODE_REAL: | 1573 | case X86EMUL_MODE_REAL: |
1611 | return emulate_int_real(ctxt, ops, irq); | 1574 | return emulate_int_real(ctxt, irq); |
1612 | case X86EMUL_MODE_VM86: | 1575 | case X86EMUL_MODE_VM86: |
1613 | case X86EMUL_MODE_PROT16: | 1576 | case X86EMUL_MODE_PROT16: |
1614 | case X86EMUL_MODE_PROT32: | 1577 | case X86EMUL_MODE_PROT32: |
@@ -1619,10 +1582,8 @@ static int emulate_int(struct x86_emulate_ctxt *ctxt, | |||
1619 | } | 1582 | } |
1620 | } | 1583 | } |
1621 | 1584 | ||
1622 | static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | 1585 | static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) |
1623 | struct x86_emulate_ops *ops) | ||
1624 | { | 1586 | { |
1625 | struct decode_cache *c = &ctxt->decode; | ||
1626 | int rc = X86EMUL_CONTINUE; | 1587 | int rc = X86EMUL_CONTINUE; |
1627 | unsigned long temp_eip = 0; | 1588 | unsigned long temp_eip = 0; |
1628 | unsigned long temp_eflags = 0; | 1589 | unsigned long temp_eflags = 0; |
@@ -1634,7 +1595,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1634 | 1595 | ||
1635 | /* TODO: Add stack limit check */ | 1596 | /* TODO: Add stack limit check */ |
1636 | 1597 | ||
1637 | rc = emulate_pop(ctxt, &temp_eip, c->op_bytes); | 1598 | rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes); |
1638 | 1599 | ||
1639 | if (rc != X86EMUL_CONTINUE) | 1600 | if (rc != X86EMUL_CONTINUE) |
1640 | return rc; | 1601 | return rc; |
@@ -1642,27 +1603,27 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1642 | if (temp_eip & ~0xffff) | 1603 | if (temp_eip & ~0xffff) |
1643 | return emulate_gp(ctxt, 0); | 1604 | return emulate_gp(ctxt, 0); |
1644 | 1605 | ||
1645 | rc = emulate_pop(ctxt, &cs, c->op_bytes); | 1606 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); |
1646 | 1607 | ||
1647 | if (rc != X86EMUL_CONTINUE) | 1608 | if (rc != X86EMUL_CONTINUE) |
1648 | return rc; | 1609 | return rc; |
1649 | 1610 | ||
1650 | rc = emulate_pop(ctxt, &temp_eflags, c->op_bytes); | 1611 | rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes); |
1651 | 1612 | ||
1652 | if (rc != X86EMUL_CONTINUE) | 1613 | if (rc != X86EMUL_CONTINUE) |
1653 | return rc; | 1614 | return rc; |
1654 | 1615 | ||
1655 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); | 1616 | rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); |
1656 | 1617 | ||
1657 | if (rc != X86EMUL_CONTINUE) | 1618 | if (rc != X86EMUL_CONTINUE) |
1658 | return rc; | 1619 | return rc; |
1659 | 1620 | ||
1660 | c->eip = temp_eip; | 1621 | ctxt->_eip = temp_eip; |
1661 | 1622 | ||
1662 | 1623 | ||
1663 | if (c->op_bytes == 4) | 1624 | if (ctxt->op_bytes == 4) |
1664 | ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask)); | 1625 | ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask)); |
1665 | else if (c->op_bytes == 2) { | 1626 | else if (ctxt->op_bytes == 2) { |
1666 | ctxt->eflags &= ~0xffff; | 1627 | ctxt->eflags &= ~0xffff; |
1667 | ctxt->eflags |= temp_eflags; | 1628 | ctxt->eflags |= temp_eflags; |
1668 | } | 1629 | } |
@@ -1673,12 +1634,11 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, | |||
1673 | return rc; | 1634 | return rc; |
1674 | } | 1635 | } |
1675 | 1636 | ||
1676 | static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, | 1637 | static int em_iret(struct x86_emulate_ctxt *ctxt) |
1677 | struct x86_emulate_ops* ops) | ||
1678 | { | 1638 | { |
1679 | switch(ctxt->mode) { | 1639 | switch(ctxt->mode) { |
1680 | case X86EMUL_MODE_REAL: | 1640 | case X86EMUL_MODE_REAL: |
1681 | return emulate_iret_real(ctxt, ops); | 1641 | return emulate_iret_real(ctxt); |
1682 | case X86EMUL_MODE_VM86: | 1642 | case X86EMUL_MODE_VM86: |
1683 | case X86EMUL_MODE_PROT16: | 1643 | case X86EMUL_MODE_PROT16: |
1684 | case X86EMUL_MODE_PROT32: | 1644 | case X86EMUL_MODE_PROT32: |
@@ -1691,53 +1651,49 @@ static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, | |||
1691 | 1651 | ||
1692 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | 1652 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) |
1693 | { | 1653 | { |
1694 | struct decode_cache *c = &ctxt->decode; | ||
1695 | int rc; | 1654 | int rc; |
1696 | unsigned short sel; | 1655 | unsigned short sel; |
1697 | 1656 | ||
1698 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | 1657 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
1699 | 1658 | ||
1700 | rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); | 1659 | rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); |
1701 | if (rc != X86EMUL_CONTINUE) | 1660 | if (rc != X86EMUL_CONTINUE) |
1702 | return rc; | 1661 | return rc; |
1703 | 1662 | ||
1704 | c->eip = 0; | 1663 | ctxt->_eip = 0; |
1705 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | 1664 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); |
1706 | return X86EMUL_CONTINUE; | 1665 | return X86EMUL_CONTINUE; |
1707 | } | 1666 | } |
1708 | 1667 | ||
1709 | static int em_grp1a(struct x86_emulate_ctxt *ctxt) | 1668 | static int em_grp1a(struct x86_emulate_ctxt *ctxt) |
1710 | { | 1669 | { |
1711 | struct decode_cache *c = &ctxt->decode; | 1670 | return emulate_pop(ctxt, &ctxt->dst.val, ctxt->dst.bytes); |
1712 | |||
1713 | return emulate_pop(ctxt, &c->dst.val, c->dst.bytes); | ||
1714 | } | 1671 | } |
1715 | 1672 | ||
1716 | static int em_grp2(struct x86_emulate_ctxt *ctxt) | 1673 | static int em_grp2(struct x86_emulate_ctxt *ctxt) |
1717 | { | 1674 | { |
1718 | struct decode_cache *c = &ctxt->decode; | 1675 | switch (ctxt->modrm_reg) { |
1719 | switch (c->modrm_reg) { | ||
1720 | case 0: /* rol */ | 1676 | case 0: /* rol */ |
1721 | emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags); | 1677 | emulate_2op_SrcB("rol", ctxt->src, ctxt->dst, ctxt->eflags); |
1722 | break; | 1678 | break; |
1723 | case 1: /* ror */ | 1679 | case 1: /* ror */ |
1724 | emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags); | 1680 | emulate_2op_SrcB("ror", ctxt->src, ctxt->dst, ctxt->eflags); |
1725 | break; | 1681 | break; |
1726 | case 2: /* rcl */ | 1682 | case 2: /* rcl */ |
1727 | emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags); | 1683 | emulate_2op_SrcB("rcl", ctxt->src, ctxt->dst, ctxt->eflags); |
1728 | break; | 1684 | break; |
1729 | case 3: /* rcr */ | 1685 | case 3: /* rcr */ |
1730 | emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags); | 1686 | emulate_2op_SrcB("rcr", ctxt->src, ctxt->dst, ctxt->eflags); |
1731 | break; | 1687 | break; |
1732 | case 4: /* sal/shl */ | 1688 | case 4: /* sal/shl */ |
1733 | case 6: /* sal/shl */ | 1689 | case 6: /* sal/shl */ |
1734 | emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags); | 1690 | emulate_2op_SrcB("sal", ctxt->src, ctxt->dst, ctxt->eflags); |
1735 | break; | 1691 | break; |
1736 | case 5: /* shr */ | 1692 | case 5: /* shr */ |
1737 | emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags); | 1693 | emulate_2op_SrcB("shr", ctxt->src, ctxt->dst, ctxt->eflags); |
1738 | break; | 1694 | break; |
1739 | case 7: /* sar */ | 1695 | case 7: /* sar */ |
1740 | emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags); | 1696 | emulate_2op_SrcB("sar", ctxt->src, ctxt->dst, ctxt->eflags); |
1741 | break; | 1697 | break; |
1742 | } | 1698 | } |
1743 | return X86EMUL_CONTINUE; | 1699 | return X86EMUL_CONTINUE; |
@@ -1745,33 +1701,32 @@ static int em_grp2(struct x86_emulate_ctxt *ctxt) | |||
1745 | 1701 | ||
1746 | static int em_grp3(struct x86_emulate_ctxt *ctxt) | 1702 | static int em_grp3(struct x86_emulate_ctxt *ctxt) |
1747 | { | 1703 | { |
1748 | struct decode_cache *c = &ctxt->decode; | 1704 | unsigned long *rax = &ctxt->regs[VCPU_REGS_RAX]; |
1749 | unsigned long *rax = &c->regs[VCPU_REGS_RAX]; | 1705 | unsigned long *rdx = &ctxt->regs[VCPU_REGS_RDX]; |
1750 | unsigned long *rdx = &c->regs[VCPU_REGS_RDX]; | ||
1751 | u8 de = 0; | 1706 | u8 de = 0; |
1752 | 1707 | ||
1753 | switch (c->modrm_reg) { | 1708 | switch (ctxt->modrm_reg) { |
1754 | case 0 ... 1: /* test */ | 1709 | case 0 ... 1: /* test */ |
1755 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 1710 | emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags); |
1756 | break; | 1711 | break; |
1757 | case 2: /* not */ | 1712 | case 2: /* not */ |
1758 | c->dst.val = ~c->dst.val; | 1713 | ctxt->dst.val = ~ctxt->dst.val; |
1759 | break; | 1714 | break; |
1760 | case 3: /* neg */ | 1715 | case 3: /* neg */ |
1761 | emulate_1op("neg", c->dst, ctxt->eflags); | 1716 | emulate_1op("neg", ctxt->dst, ctxt->eflags); |
1762 | break; | 1717 | break; |
1763 | case 4: /* mul */ | 1718 | case 4: /* mul */ |
1764 | emulate_1op_rax_rdx("mul", c->src, *rax, *rdx, ctxt->eflags); | 1719 | emulate_1op_rax_rdx("mul", ctxt->src, *rax, *rdx, ctxt->eflags); |
1765 | break; | 1720 | break; |
1766 | case 5: /* imul */ | 1721 | case 5: /* imul */ |
1767 | emulate_1op_rax_rdx("imul", c->src, *rax, *rdx, ctxt->eflags); | 1722 | emulate_1op_rax_rdx("imul", ctxt->src, *rax, *rdx, ctxt->eflags); |
1768 | break; | 1723 | break; |
1769 | case 6: /* div */ | 1724 | case 6: /* div */ |
1770 | emulate_1op_rax_rdx_ex("div", c->src, *rax, *rdx, | 1725 | emulate_1op_rax_rdx_ex("div", ctxt->src, *rax, *rdx, |
1771 | ctxt->eflags, de); | 1726 | ctxt->eflags, de); |
1772 | break; | 1727 | break; |
1773 | case 7: /* idiv */ | 1728 | case 7: /* idiv */ |
1774 | emulate_1op_rax_rdx_ex("idiv", c->src, *rax, *rdx, | 1729 | emulate_1op_rax_rdx_ex("idiv", ctxt->src, *rax, *rdx, |
1775 | ctxt->eflags, de); | 1730 | ctxt->eflags, de); |
1776 | break; | 1731 | break; |
1777 | default: | 1732 | default: |
@@ -1784,26 +1739,25 @@ static int em_grp3(struct x86_emulate_ctxt *ctxt) | |||
1784 | 1739 | ||
1785 | static int em_grp45(struct x86_emulate_ctxt *ctxt) | 1740 | static int em_grp45(struct x86_emulate_ctxt *ctxt) |
1786 | { | 1741 | { |
1787 | struct decode_cache *c = &ctxt->decode; | ||
1788 | int rc = X86EMUL_CONTINUE; | 1742 | int rc = X86EMUL_CONTINUE; |
1789 | 1743 | ||
1790 | switch (c->modrm_reg) { | 1744 | switch (ctxt->modrm_reg) { |
1791 | case 0: /* inc */ | 1745 | case 0: /* inc */ |
1792 | emulate_1op("inc", c->dst, ctxt->eflags); | 1746 | emulate_1op("inc", ctxt->dst, ctxt->eflags); |
1793 | break; | 1747 | break; |
1794 | case 1: /* dec */ | 1748 | case 1: /* dec */ |
1795 | emulate_1op("dec", c->dst, ctxt->eflags); | 1749 | emulate_1op("dec", ctxt->dst, ctxt->eflags); |
1796 | break; | 1750 | break; |
1797 | case 2: /* call near abs */ { | 1751 | case 2: /* call near abs */ { |
1798 | long int old_eip; | 1752 | long int old_eip; |
1799 | old_eip = c->eip; | 1753 | old_eip = ctxt->_eip; |
1800 | c->eip = c->src.val; | 1754 | ctxt->_eip = ctxt->src.val; |
1801 | c->src.val = old_eip; | 1755 | ctxt->src.val = old_eip; |
1802 | rc = em_push(ctxt); | 1756 | rc = em_push(ctxt); |
1803 | break; | 1757 | break; |
1804 | } | 1758 | } |
1805 | case 4: /* jmp abs */ | 1759 | case 4: /* jmp abs */ |
1806 | c->eip = c->src.val; | 1760 | ctxt->_eip = ctxt->src.val; |
1807 | break; | 1761 | break; |
1808 | case 5: /* jmp far */ | 1762 | case 5: /* jmp far */ |
1809 | rc = em_jmp_far(ctxt); | 1763 | rc = em_jmp_far(ctxt); |
@@ -1817,68 +1771,70 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
1817 | 1771 | ||
1818 | static int em_grp9(struct x86_emulate_ctxt *ctxt) | 1772 | static int em_grp9(struct x86_emulate_ctxt *ctxt) |
1819 | { | 1773 | { |
1820 | struct decode_cache *c = &ctxt->decode; | 1774 | u64 old = ctxt->dst.orig_val64; |
1821 | u64 old = c->dst.orig_val64; | ||
1822 | 1775 | ||
1823 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1776 | if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) || |
1824 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { | 1777 | ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) { |
1825 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 1778 | ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0); |
1826 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1779 | ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
1827 | ctxt->eflags &= ~EFLG_ZF; | 1780 | ctxt->eflags &= ~EFLG_ZF; |
1828 | } else { | 1781 | } else { |
1829 | c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) | | 1782 | ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) | |
1830 | (u32) c->regs[VCPU_REGS_RBX]; | 1783 | (u32) ctxt->regs[VCPU_REGS_RBX]; |
1831 | 1784 | ||
1832 | ctxt->eflags |= EFLG_ZF; | 1785 | ctxt->eflags |= EFLG_ZF; |
1833 | } | 1786 | } |
1834 | return X86EMUL_CONTINUE; | 1787 | return X86EMUL_CONTINUE; |
1835 | } | 1788 | } |
1836 | 1789 | ||
1837 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | 1790 | static int em_ret(struct x86_emulate_ctxt *ctxt) |
1838 | struct x86_emulate_ops *ops) | 1791 | { |
1792 | ctxt->dst.type = OP_REG; | ||
1793 | ctxt->dst.addr.reg = &ctxt->_eip; | ||
1794 | ctxt->dst.bytes = ctxt->op_bytes; | ||
1795 | return em_pop(ctxt); | ||
1796 | } | ||
1797 | |||
1798 | static int em_ret_far(struct x86_emulate_ctxt *ctxt) | ||
1839 | { | 1799 | { |
1840 | struct decode_cache *c = &ctxt->decode; | ||
1841 | int rc; | 1800 | int rc; |
1842 | unsigned long cs; | 1801 | unsigned long cs; |
1843 | 1802 | ||
1844 | rc = emulate_pop(ctxt, &c->eip, c->op_bytes); | 1803 | rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); |
1845 | if (rc != X86EMUL_CONTINUE) | 1804 | if (rc != X86EMUL_CONTINUE) |
1846 | return rc; | 1805 | return rc; |
1847 | if (c->op_bytes == 4) | 1806 | if (ctxt->op_bytes == 4) |
1848 | c->eip = (u32)c->eip; | 1807 | ctxt->_eip = (u32)ctxt->_eip; |
1849 | rc = emulate_pop(ctxt, &cs, c->op_bytes); | 1808 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); |
1850 | if (rc != X86EMUL_CONTINUE) | 1809 | if (rc != X86EMUL_CONTINUE) |
1851 | return rc; | 1810 | return rc; |
1852 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); | 1811 | rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); |
1853 | return rc; | 1812 | return rc; |
1854 | } | 1813 | } |
1855 | 1814 | ||
1856 | static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, | 1815 | static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, int seg) |
1857 | struct x86_emulate_ops *ops, int seg) | ||
1858 | { | 1816 | { |
1859 | struct decode_cache *c = &ctxt->decode; | ||
1860 | unsigned short sel; | 1817 | unsigned short sel; |
1861 | int rc; | 1818 | int rc; |
1862 | 1819 | ||
1863 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | 1820 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
1864 | 1821 | ||
1865 | rc = load_segment_descriptor(ctxt, ops, sel, seg); | 1822 | rc = load_segment_descriptor(ctxt, sel, seg); |
1866 | if (rc != X86EMUL_CONTINUE) | 1823 | if (rc != X86EMUL_CONTINUE) |
1867 | return rc; | 1824 | return rc; |
1868 | 1825 | ||
1869 | c->dst.val = c->src.val; | 1826 | ctxt->dst.val = ctxt->src.val; |
1870 | return rc; | 1827 | return rc; |
1871 | } | 1828 | } |
1872 | 1829 | ||
1873 | static inline void | 1830 | static void |
1874 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | 1831 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, |
1875 | struct x86_emulate_ops *ops, struct desc_struct *cs, | 1832 | struct desc_struct *cs, struct desc_struct *ss) |
1876 | struct desc_struct *ss) | ||
1877 | { | 1833 | { |
1878 | u16 selector; | 1834 | u16 selector; |
1879 | 1835 | ||
1880 | memset(cs, 0, sizeof(struct desc_struct)); | 1836 | memset(cs, 0, sizeof(struct desc_struct)); |
1881 | ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); | 1837 | ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); |
1882 | memset(ss, 0, sizeof(struct desc_struct)); | 1838 | memset(ss, 0, sizeof(struct desc_struct)); |
1883 | 1839 | ||
1884 | cs->l = 0; /* will be adjusted later */ | 1840 | cs->l = 0; /* will be adjusted later */ |
@@ -1901,10 +1857,9 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1901 | ss->p = 1; | 1857 | ss->p = 1; |
1902 | } | 1858 | } |
1903 | 1859 | ||
1904 | static int | 1860 | static int em_syscall(struct x86_emulate_ctxt *ctxt) |
1905 | emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | ||
1906 | { | 1861 | { |
1907 | struct decode_cache *c = &ctxt->decode; | 1862 | struct x86_emulate_ops *ops = ctxt->ops; |
1908 | struct desc_struct cs, ss; | 1863 | struct desc_struct cs, ss; |
1909 | u64 msr_data; | 1864 | u64 msr_data; |
1910 | u16 cs_sel, ss_sel; | 1865 | u16 cs_sel, ss_sel; |
@@ -1916,7 +1871,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1916 | return emulate_ud(ctxt); | 1871 | return emulate_ud(ctxt); |
1917 | 1872 | ||
1918 | ops->get_msr(ctxt, MSR_EFER, &efer); | 1873 | ops->get_msr(ctxt, MSR_EFER, &efer); |
1919 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1874 | setup_syscalls_segments(ctxt, &cs, &ss); |
1920 | 1875 | ||
1921 | ops->get_msr(ctxt, MSR_STAR, &msr_data); | 1876 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
1922 | msr_data >>= 32; | 1877 | msr_data >>= 32; |
@@ -1930,15 +1885,15 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1930 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 1885 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
1931 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 1886 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
1932 | 1887 | ||
1933 | c->regs[VCPU_REGS_RCX] = c->eip; | 1888 | ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip; |
1934 | if (efer & EFER_LMA) { | 1889 | if (efer & EFER_LMA) { |
1935 | #ifdef CONFIG_X86_64 | 1890 | #ifdef CONFIG_X86_64 |
1936 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; | 1891 | ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; |
1937 | 1892 | ||
1938 | ops->get_msr(ctxt, | 1893 | ops->get_msr(ctxt, |
1939 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 1894 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
1940 | MSR_LSTAR : MSR_CSTAR, &msr_data); | 1895 | MSR_LSTAR : MSR_CSTAR, &msr_data); |
1941 | c->eip = msr_data; | 1896 | ctxt->_eip = msr_data; |
1942 | 1897 | ||
1943 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); | 1898 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); |
1944 | ctxt->eflags &= ~(msr_data | EFLG_RF); | 1899 | ctxt->eflags &= ~(msr_data | EFLG_RF); |
@@ -1946,7 +1901,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1946 | } else { | 1901 | } else { |
1947 | /* legacy mode */ | 1902 | /* legacy mode */ |
1948 | ops->get_msr(ctxt, MSR_STAR, &msr_data); | 1903 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
1949 | c->eip = (u32)msr_data; | 1904 | ctxt->_eip = (u32)msr_data; |
1950 | 1905 | ||
1951 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1906 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
1952 | } | 1907 | } |
@@ -1954,16 +1909,15 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1954 | return X86EMUL_CONTINUE; | 1909 | return X86EMUL_CONTINUE; |
1955 | } | 1910 | } |
1956 | 1911 | ||
1957 | static int | 1912 | static int em_sysenter(struct x86_emulate_ctxt *ctxt) |
1958 | emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | ||
1959 | { | 1913 | { |
1960 | struct decode_cache *c = &ctxt->decode; | 1914 | struct x86_emulate_ops *ops = ctxt->ops; |
1961 | struct desc_struct cs, ss; | 1915 | struct desc_struct cs, ss; |
1962 | u64 msr_data; | 1916 | u64 msr_data; |
1963 | u16 cs_sel, ss_sel; | 1917 | u16 cs_sel, ss_sel; |
1964 | u64 efer = 0; | 1918 | u64 efer = 0; |
1965 | 1919 | ||
1966 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | 1920 | ops->get_msr(ctxt, MSR_EFER, &efer); |
1967 | /* inject #GP if in real mode */ | 1921 | /* inject #GP if in real mode */ |
1968 | if (ctxt->mode == X86EMUL_MODE_REAL) | 1922 | if (ctxt->mode == X86EMUL_MODE_REAL) |
1969 | return emulate_gp(ctxt, 0); | 1923 | return emulate_gp(ctxt, 0); |
@@ -1974,7 +1928,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1974 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1928 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1975 | return emulate_ud(ctxt); | 1929 | return emulate_ud(ctxt); |
1976 | 1930 | ||
1977 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1931 | setup_syscalls_segments(ctxt, &cs, &ss); |
1978 | 1932 | ||
1979 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); | 1933 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
1980 | switch (ctxt->mode) { | 1934 | switch (ctxt->mode) { |
@@ -2002,31 +1956,30 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2002 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 1956 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
2003 | 1957 | ||
2004 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); | 1958 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); |
2005 | c->eip = msr_data; | 1959 | ctxt->_eip = msr_data; |
2006 | 1960 | ||
2007 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); | 1961 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); |
2008 | c->regs[VCPU_REGS_RSP] = msr_data; | 1962 | ctxt->regs[VCPU_REGS_RSP] = msr_data; |
2009 | 1963 | ||
2010 | return X86EMUL_CONTINUE; | 1964 | return X86EMUL_CONTINUE; |
2011 | } | 1965 | } |
2012 | 1966 | ||
2013 | static int | 1967 | static int em_sysexit(struct x86_emulate_ctxt *ctxt) |
2014 | emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | ||
2015 | { | 1968 | { |
2016 | struct decode_cache *c = &ctxt->decode; | 1969 | struct x86_emulate_ops *ops = ctxt->ops; |
2017 | struct desc_struct cs, ss; | 1970 | struct desc_struct cs, ss; |
2018 | u64 msr_data; | 1971 | u64 msr_data; |
2019 | int usermode; | 1972 | int usermode; |
2020 | u16 cs_sel, ss_sel; | 1973 | u16 cs_sel = 0, ss_sel = 0; |
2021 | 1974 | ||
2022 | /* inject #GP if in real mode or Virtual 8086 mode */ | 1975 | /* inject #GP if in real mode or Virtual 8086 mode */ |
2023 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1976 | if (ctxt->mode == X86EMUL_MODE_REAL || |
2024 | ctxt->mode == X86EMUL_MODE_VM86) | 1977 | ctxt->mode == X86EMUL_MODE_VM86) |
2025 | return emulate_gp(ctxt, 0); | 1978 | return emulate_gp(ctxt, 0); |
2026 | 1979 | ||
2027 | setup_syscalls_segments(ctxt, ops, &cs, &ss); | 1980 | setup_syscalls_segments(ctxt, &cs, &ss); |
2028 | 1981 | ||
2029 | if ((c->rex_prefix & 0x8) != 0x0) | 1982 | if ((ctxt->rex_prefix & 0x8) != 0x0) |
2030 | usermode = X86EMUL_MODE_PROT64; | 1983 | usermode = X86EMUL_MODE_PROT64; |
2031 | else | 1984 | else |
2032 | usermode = X86EMUL_MODE_PROT32; | 1985 | usermode = X86EMUL_MODE_PROT32; |
@@ -2056,14 +2009,13 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2056 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 2009 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
2057 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 2010 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
2058 | 2011 | ||
2059 | c->eip = c->regs[VCPU_REGS_RDX]; | 2012 | ctxt->_eip = ctxt->regs[VCPU_REGS_RDX]; |
2060 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; | 2013 | ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX]; |
2061 | 2014 | ||
2062 | return X86EMUL_CONTINUE; | 2015 | return X86EMUL_CONTINUE; |
2063 | } | 2016 | } |
2064 | 2017 | ||
2065 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, | 2018 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) |
2066 | struct x86_emulate_ops *ops) | ||
2067 | { | 2019 | { |
2068 | int iopl; | 2020 | int iopl; |
2069 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2021 | if (ctxt->mode == X86EMUL_MODE_REAL) |
@@ -2071,13 +2023,13 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, | |||
2071 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2023 | if (ctxt->mode == X86EMUL_MODE_VM86) |
2072 | return true; | 2024 | return true; |
2073 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2025 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
2074 | return ops->cpl(ctxt) > iopl; | 2026 | return ctxt->ops->cpl(ctxt) > iopl; |
2075 | } | 2027 | } |
2076 | 2028 | ||
2077 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2029 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
2078 | struct x86_emulate_ops *ops, | ||
2079 | u16 port, u16 len) | 2030 | u16 port, u16 len) |
2080 | { | 2031 | { |
2032 | struct x86_emulate_ops *ops = ctxt->ops; | ||
2081 | struct desc_struct tr_seg; | 2033 | struct desc_struct tr_seg; |
2082 | u32 base3; | 2034 | u32 base3; |
2083 | int r; | 2035 | int r; |
@@ -2108,14 +2060,13 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
2108 | } | 2060 | } |
2109 | 2061 | ||
2110 | static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | 2062 | static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, |
2111 | struct x86_emulate_ops *ops, | ||
2112 | u16 port, u16 len) | 2063 | u16 port, u16 len) |
2113 | { | 2064 | { |
2114 | if (ctxt->perm_ok) | 2065 | if (ctxt->perm_ok) |
2115 | return true; | 2066 | return true; |
2116 | 2067 | ||
2117 | if (emulator_bad_iopl(ctxt, ops)) | 2068 | if (emulator_bad_iopl(ctxt)) |
2118 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | 2069 | if (!emulator_io_port_access_allowed(ctxt, port, len)) |
2119 | return false; | 2070 | return false; |
2120 | 2071 | ||
2121 | ctxt->perm_ok = true; | 2072 | ctxt->perm_ok = true; |
@@ -2124,21 +2075,18 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
2124 | } | 2075 | } |
2125 | 2076 | ||
2126 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | 2077 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, |
2127 | struct x86_emulate_ops *ops, | ||
2128 | struct tss_segment_16 *tss) | 2078 | struct tss_segment_16 *tss) |
2129 | { | 2079 | { |
2130 | struct decode_cache *c = &ctxt->decode; | 2080 | tss->ip = ctxt->_eip; |
2131 | |||
2132 | tss->ip = c->eip; | ||
2133 | tss->flag = ctxt->eflags; | 2081 | tss->flag = ctxt->eflags; |
2134 | tss->ax = c->regs[VCPU_REGS_RAX]; | 2082 | tss->ax = ctxt->regs[VCPU_REGS_RAX]; |
2135 | tss->cx = c->regs[VCPU_REGS_RCX]; | 2083 | tss->cx = ctxt->regs[VCPU_REGS_RCX]; |
2136 | tss->dx = c->regs[VCPU_REGS_RDX]; | 2084 | tss->dx = ctxt->regs[VCPU_REGS_RDX]; |
2137 | tss->bx = c->regs[VCPU_REGS_RBX]; | 2085 | tss->bx = ctxt->regs[VCPU_REGS_RBX]; |
2138 | tss->sp = c->regs[VCPU_REGS_RSP]; | 2086 | tss->sp = ctxt->regs[VCPU_REGS_RSP]; |
2139 | tss->bp = c->regs[VCPU_REGS_RBP]; | 2087 | tss->bp = ctxt->regs[VCPU_REGS_RBP]; |
2140 | tss->si = c->regs[VCPU_REGS_RSI]; | 2088 | tss->si = ctxt->regs[VCPU_REGS_RSI]; |
2141 | tss->di = c->regs[VCPU_REGS_RDI]; | 2089 | tss->di = ctxt->regs[VCPU_REGS_RDI]; |
2142 | 2090 | ||
2143 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); | 2091 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
2144 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); | 2092 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
@@ -2148,22 +2096,20 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | |||
2148 | } | 2096 | } |
2149 | 2097 | ||
2150 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | 2098 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, |
2151 | struct x86_emulate_ops *ops, | ||
2152 | struct tss_segment_16 *tss) | 2099 | struct tss_segment_16 *tss) |
2153 | { | 2100 | { |
2154 | struct decode_cache *c = &ctxt->decode; | ||
2155 | int ret; | 2101 | int ret; |
2156 | 2102 | ||
2157 | c->eip = tss->ip; | 2103 | ctxt->_eip = tss->ip; |
2158 | ctxt->eflags = tss->flag | 2; | 2104 | ctxt->eflags = tss->flag | 2; |
2159 | c->regs[VCPU_REGS_RAX] = tss->ax; | 2105 | ctxt->regs[VCPU_REGS_RAX] = tss->ax; |
2160 | c->regs[VCPU_REGS_RCX] = tss->cx; | 2106 | ctxt->regs[VCPU_REGS_RCX] = tss->cx; |
2161 | c->regs[VCPU_REGS_RDX] = tss->dx; | 2107 | ctxt->regs[VCPU_REGS_RDX] = tss->dx; |
2162 | c->regs[VCPU_REGS_RBX] = tss->bx; | 2108 | ctxt->regs[VCPU_REGS_RBX] = tss->bx; |
2163 | c->regs[VCPU_REGS_RSP] = tss->sp; | 2109 | ctxt->regs[VCPU_REGS_RSP] = tss->sp; |
2164 | c->regs[VCPU_REGS_RBP] = tss->bp; | 2110 | ctxt->regs[VCPU_REGS_RBP] = tss->bp; |
2165 | c->regs[VCPU_REGS_RSI] = tss->si; | 2111 | ctxt->regs[VCPU_REGS_RSI] = tss->si; |
2166 | c->regs[VCPU_REGS_RDI] = tss->di; | 2112 | ctxt->regs[VCPU_REGS_RDI] = tss->di; |
2167 | 2113 | ||
2168 | /* | 2114 | /* |
2169 | * SDM says that segment selectors are loaded before segment | 2115 | * SDM says that segment selectors are loaded before segment |
@@ -2179,19 +2125,19 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2179 | * Now load segment descriptors. If fault happenes at this stage | 2125 | * Now load segment descriptors. If fault happenes at this stage |
2180 | * it is handled in a context of new task | 2126 | * it is handled in a context of new task |
2181 | */ | 2127 | */ |
2182 | ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); | 2128 | ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); |
2183 | if (ret != X86EMUL_CONTINUE) | 2129 | if (ret != X86EMUL_CONTINUE) |
2184 | return ret; | 2130 | return ret; |
2185 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | 2131 | ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); |
2186 | if (ret != X86EMUL_CONTINUE) | 2132 | if (ret != X86EMUL_CONTINUE) |
2187 | return ret; | 2133 | return ret; |
2188 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | 2134 | ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); |
2189 | if (ret != X86EMUL_CONTINUE) | 2135 | if (ret != X86EMUL_CONTINUE) |
2190 | return ret; | 2136 | return ret; |
2191 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | 2137 | ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); |
2192 | if (ret != X86EMUL_CONTINUE) | 2138 | if (ret != X86EMUL_CONTINUE) |
2193 | return ret; | 2139 | return ret; |
2194 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | 2140 | ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); |
2195 | if (ret != X86EMUL_CONTINUE) | 2141 | if (ret != X86EMUL_CONTINUE) |
2196 | return ret; | 2142 | return ret; |
2197 | 2143 | ||
@@ -2199,10 +2145,10 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2199 | } | 2145 | } |
2200 | 2146 | ||
2201 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, | 2147 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, |
2202 | struct x86_emulate_ops *ops, | ||
2203 | u16 tss_selector, u16 old_tss_sel, | 2148 | u16 tss_selector, u16 old_tss_sel, |
2204 | ulong old_tss_base, struct desc_struct *new_desc) | 2149 | ulong old_tss_base, struct desc_struct *new_desc) |
2205 | { | 2150 | { |
2151 | struct x86_emulate_ops *ops = ctxt->ops; | ||
2206 | struct tss_segment_16 tss_seg; | 2152 | struct tss_segment_16 tss_seg; |
2207 | int ret; | 2153 | int ret; |
2208 | u32 new_tss_base = get_desc_base(new_desc); | 2154 | u32 new_tss_base = get_desc_base(new_desc); |
@@ -2213,7 +2159,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2213 | /* FIXME: need to provide precise fault address */ | 2159 | /* FIXME: need to provide precise fault address */ |
2214 | return ret; | 2160 | return ret; |
2215 | 2161 | ||
2216 | save_state_to_tss16(ctxt, ops, &tss_seg); | 2162 | save_state_to_tss16(ctxt, &tss_seg); |
2217 | 2163 | ||
2218 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, | 2164 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
2219 | &ctxt->exception); | 2165 | &ctxt->exception); |
@@ -2239,26 +2185,23 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2239 | return ret; | 2185 | return ret; |
2240 | } | 2186 | } |
2241 | 2187 | ||
2242 | return load_state_from_tss16(ctxt, ops, &tss_seg); | 2188 | return load_state_from_tss16(ctxt, &tss_seg); |
2243 | } | 2189 | } |
2244 | 2190 | ||
2245 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | 2191 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, |
2246 | struct x86_emulate_ops *ops, | ||
2247 | struct tss_segment_32 *tss) | 2192 | struct tss_segment_32 *tss) |
2248 | { | 2193 | { |
2249 | struct decode_cache *c = &ctxt->decode; | 2194 | tss->cr3 = ctxt->ops->get_cr(ctxt, 3); |
2250 | 2195 | tss->eip = ctxt->_eip; | |
2251 | tss->cr3 = ops->get_cr(ctxt, 3); | ||
2252 | tss->eip = c->eip; | ||
2253 | tss->eflags = ctxt->eflags; | 2196 | tss->eflags = ctxt->eflags; |
2254 | tss->eax = c->regs[VCPU_REGS_RAX]; | 2197 | tss->eax = ctxt->regs[VCPU_REGS_RAX]; |
2255 | tss->ecx = c->regs[VCPU_REGS_RCX]; | 2198 | tss->ecx = ctxt->regs[VCPU_REGS_RCX]; |
2256 | tss->edx = c->regs[VCPU_REGS_RDX]; | 2199 | tss->edx = ctxt->regs[VCPU_REGS_RDX]; |
2257 | tss->ebx = c->regs[VCPU_REGS_RBX]; | 2200 | tss->ebx = ctxt->regs[VCPU_REGS_RBX]; |
2258 | tss->esp = c->regs[VCPU_REGS_RSP]; | 2201 | tss->esp = ctxt->regs[VCPU_REGS_RSP]; |
2259 | tss->ebp = c->regs[VCPU_REGS_RBP]; | 2202 | tss->ebp = ctxt->regs[VCPU_REGS_RBP]; |
2260 | tss->esi = c->regs[VCPU_REGS_RSI]; | 2203 | tss->esi = ctxt->regs[VCPU_REGS_RSI]; |
2261 | tss->edi = c->regs[VCPU_REGS_RDI]; | 2204 | tss->edi = ctxt->regs[VCPU_REGS_RDI]; |
2262 | 2205 | ||
2263 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); | 2206 | tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); |
2264 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); | 2207 | tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
@@ -2270,24 +2213,22 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | |||
2270 | } | 2213 | } |
2271 | 2214 | ||
2272 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | 2215 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, |
2273 | struct x86_emulate_ops *ops, | ||
2274 | struct tss_segment_32 *tss) | 2216 | struct tss_segment_32 *tss) |
2275 | { | 2217 | { |
2276 | struct decode_cache *c = &ctxt->decode; | ||
2277 | int ret; | 2218 | int ret; |
2278 | 2219 | ||
2279 | if (ops->set_cr(ctxt, 3, tss->cr3)) | 2220 | if (ctxt->ops->set_cr(ctxt, 3, tss->cr3)) |
2280 | return emulate_gp(ctxt, 0); | 2221 | return emulate_gp(ctxt, 0); |
2281 | c->eip = tss->eip; | 2222 | ctxt->_eip = tss->eip; |
2282 | ctxt->eflags = tss->eflags | 2; | 2223 | ctxt->eflags = tss->eflags | 2; |
2283 | c->regs[VCPU_REGS_RAX] = tss->eax; | 2224 | ctxt->regs[VCPU_REGS_RAX] = tss->eax; |
2284 | c->regs[VCPU_REGS_RCX] = tss->ecx; | 2225 | ctxt->regs[VCPU_REGS_RCX] = tss->ecx; |
2285 | c->regs[VCPU_REGS_RDX] = tss->edx; | 2226 | ctxt->regs[VCPU_REGS_RDX] = tss->edx; |
2286 | c->regs[VCPU_REGS_RBX] = tss->ebx; | 2227 | ctxt->regs[VCPU_REGS_RBX] = tss->ebx; |
2287 | c->regs[VCPU_REGS_RSP] = tss->esp; | 2228 | ctxt->regs[VCPU_REGS_RSP] = tss->esp; |
2288 | c->regs[VCPU_REGS_RBP] = tss->ebp; | 2229 | ctxt->regs[VCPU_REGS_RBP] = tss->ebp; |
2289 | c->regs[VCPU_REGS_RSI] = tss->esi; | 2230 | ctxt->regs[VCPU_REGS_RSI] = tss->esi; |
2290 | c->regs[VCPU_REGS_RDI] = tss->edi; | 2231 | ctxt->regs[VCPU_REGS_RDI] = tss->edi; |
2291 | 2232 | ||
2292 | /* | 2233 | /* |
2293 | * SDM says that segment selectors are loaded before segment | 2234 | * SDM says that segment selectors are loaded before segment |
@@ -2305,25 +2246,25 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2305 | * Now load segment descriptors. If fault happenes at this stage | 2246 | * Now load segment descriptors. If fault happenes at this stage |
2306 | * it is handled in a context of new task | 2247 | * it is handled in a context of new task |
2307 | */ | 2248 | */ |
2308 | ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); | 2249 | ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); |
2309 | if (ret != X86EMUL_CONTINUE) | 2250 | if (ret != X86EMUL_CONTINUE) |
2310 | return ret; | 2251 | return ret; |
2311 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | 2252 | ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); |
2312 | if (ret != X86EMUL_CONTINUE) | 2253 | if (ret != X86EMUL_CONTINUE) |
2313 | return ret; | 2254 | return ret; |
2314 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | 2255 | ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); |
2315 | if (ret != X86EMUL_CONTINUE) | 2256 | if (ret != X86EMUL_CONTINUE) |
2316 | return ret; | 2257 | return ret; |
2317 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | 2258 | ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); |
2318 | if (ret != X86EMUL_CONTINUE) | 2259 | if (ret != X86EMUL_CONTINUE) |
2319 | return ret; | 2260 | return ret; |
2320 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | 2261 | ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); |
2321 | if (ret != X86EMUL_CONTINUE) | 2262 | if (ret != X86EMUL_CONTINUE) |
2322 | return ret; | 2263 | return ret; |
2323 | ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); | 2264 | ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS); |
2324 | if (ret != X86EMUL_CONTINUE) | 2265 | if (ret != X86EMUL_CONTINUE) |
2325 | return ret; | 2266 | return ret; |
2326 | ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); | 2267 | ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS); |
2327 | if (ret != X86EMUL_CONTINUE) | 2268 | if (ret != X86EMUL_CONTINUE) |
2328 | return ret; | 2269 | return ret; |
2329 | 2270 | ||
@@ -2331,10 +2272,10 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2331 | } | 2272 | } |
2332 | 2273 | ||
2333 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | 2274 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, |
2334 | struct x86_emulate_ops *ops, | ||
2335 | u16 tss_selector, u16 old_tss_sel, | 2275 | u16 tss_selector, u16 old_tss_sel, |
2336 | ulong old_tss_base, struct desc_struct *new_desc) | 2276 | ulong old_tss_base, struct desc_struct *new_desc) |
2337 | { | 2277 | { |
2278 | struct x86_emulate_ops *ops = ctxt->ops; | ||
2338 | struct tss_segment_32 tss_seg; | 2279 | struct tss_segment_32 tss_seg; |
2339 | int ret; | 2280 | int ret; |
2340 | u32 new_tss_base = get_desc_base(new_desc); | 2281 | u32 new_tss_base = get_desc_base(new_desc); |
@@ -2345,7 +2286,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2345 | /* FIXME: need to provide precise fault address */ | 2286 | /* FIXME: need to provide precise fault address */ |
2346 | return ret; | 2287 | return ret; |
2347 | 2288 | ||
2348 | save_state_to_tss32(ctxt, ops, &tss_seg); | 2289 | save_state_to_tss32(ctxt, &tss_seg); |
2349 | 2290 | ||
2350 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, | 2291 | ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, |
2351 | &ctxt->exception); | 2292 | &ctxt->exception); |
@@ -2371,14 +2312,14 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2371 | return ret; | 2312 | return ret; |
2372 | } | 2313 | } |
2373 | 2314 | ||
2374 | return load_state_from_tss32(ctxt, ops, &tss_seg); | 2315 | return load_state_from_tss32(ctxt, &tss_seg); |
2375 | } | 2316 | } |
2376 | 2317 | ||
2377 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | 2318 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, |
2378 | struct x86_emulate_ops *ops, | ||
2379 | u16 tss_selector, int reason, | 2319 | u16 tss_selector, int reason, |
2380 | bool has_error_code, u32 error_code) | 2320 | bool has_error_code, u32 error_code) |
2381 | { | 2321 | { |
2322 | struct x86_emulate_ops *ops = ctxt->ops; | ||
2382 | struct desc_struct curr_tss_desc, next_tss_desc; | 2323 | struct desc_struct curr_tss_desc, next_tss_desc; |
2383 | int ret; | 2324 | int ret; |
2384 | u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); | 2325 | u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); |
@@ -2388,10 +2329,10 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2388 | 2329 | ||
2389 | /* FIXME: old_tss_base == ~0 ? */ | 2330 | /* FIXME: old_tss_base == ~0 ? */ |
2390 | 2331 | ||
2391 | ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); | 2332 | ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc); |
2392 | if (ret != X86EMUL_CONTINUE) | 2333 | if (ret != X86EMUL_CONTINUE) |
2393 | return ret; | 2334 | return ret; |
2394 | ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); | 2335 | ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc); |
2395 | if (ret != X86EMUL_CONTINUE) | 2336 | if (ret != X86EMUL_CONTINUE) |
2396 | return ret; | 2337 | return ret; |
2397 | 2338 | ||
@@ -2413,8 +2354,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2413 | 2354 | ||
2414 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 2355 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
2415 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ | 2356 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ |
2416 | write_segment_descriptor(ctxt, ops, old_tss_sel, | 2357 | write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc); |
2417 | &curr_tss_desc); | ||
2418 | } | 2358 | } |
2419 | 2359 | ||
2420 | if (reason == TASK_SWITCH_IRET) | 2360 | if (reason == TASK_SWITCH_IRET) |
@@ -2426,10 +2366,10 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2426 | old_tss_sel = 0xffff; | 2366 | old_tss_sel = 0xffff; |
2427 | 2367 | ||
2428 | if (next_tss_desc.type & 8) | 2368 | if (next_tss_desc.type & 8) |
2429 | ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, | 2369 | ret = task_switch_32(ctxt, tss_selector, old_tss_sel, |
2430 | old_tss_base, &next_tss_desc); | 2370 | old_tss_base, &next_tss_desc); |
2431 | else | 2371 | else |
2432 | ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, | 2372 | ret = task_switch_16(ctxt, tss_selector, old_tss_sel, |
2433 | old_tss_base, &next_tss_desc); | 2373 | old_tss_base, &next_tss_desc); |
2434 | if (ret != X86EMUL_CONTINUE) | 2374 | if (ret != X86EMUL_CONTINUE) |
2435 | return ret; | 2375 | return ret; |
@@ -2439,19 +2379,16 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2439 | 2379 | ||
2440 | if (reason != TASK_SWITCH_IRET) { | 2380 | if (reason != TASK_SWITCH_IRET) { |
2441 | next_tss_desc.type |= (1 << 1); /* set busy flag */ | 2381 | next_tss_desc.type |= (1 << 1); /* set busy flag */ |
2442 | write_segment_descriptor(ctxt, ops, tss_selector, | 2382 | write_segment_descriptor(ctxt, tss_selector, &next_tss_desc); |
2443 | &next_tss_desc); | ||
2444 | } | 2383 | } |
2445 | 2384 | ||
2446 | ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); | 2385 | ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); |
2447 | ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); | 2386 | ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR); |
2448 | 2387 | ||
2449 | if (has_error_code) { | 2388 | if (has_error_code) { |
2450 | struct decode_cache *c = &ctxt->decode; | 2389 | ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; |
2451 | 2390 | ctxt->lock_prefix = 0; | |
2452 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | 2391 | ctxt->src.val = (unsigned long) error_code; |
2453 | c->lock_prefix = 0; | ||
2454 | c->src.val = (unsigned long) error_code; | ||
2455 | ret = em_push(ctxt); | 2392 | ret = em_push(ctxt); |
2456 | } | 2393 | } |
2457 | 2394 | ||
@@ -2462,18 +2399,16 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2462 | u16 tss_selector, int reason, | 2399 | u16 tss_selector, int reason, |
2463 | bool has_error_code, u32 error_code) | 2400 | bool has_error_code, u32 error_code) |
2464 | { | 2401 | { |
2465 | struct x86_emulate_ops *ops = ctxt->ops; | ||
2466 | struct decode_cache *c = &ctxt->decode; | ||
2467 | int rc; | 2402 | int rc; |
2468 | 2403 | ||
2469 | c->eip = ctxt->eip; | 2404 | ctxt->_eip = ctxt->eip; |
2470 | c->dst.type = OP_NONE; | 2405 | ctxt->dst.type = OP_NONE; |
2471 | 2406 | ||
2472 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | 2407 | rc = emulator_do_task_switch(ctxt, tss_selector, reason, |
2473 | has_error_code, error_code); | 2408 | has_error_code, error_code); |
2474 | 2409 | ||
2475 | if (rc == X86EMUL_CONTINUE) | 2410 | if (rc == X86EMUL_CONTINUE) |
2476 | ctxt->eip = c->eip; | 2411 | ctxt->eip = ctxt->_eip; |
2477 | 2412 | ||
2478 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 2413 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
2479 | } | 2414 | } |
@@ -2481,22 +2416,20 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2481 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, | 2416 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, |
2482 | int reg, struct operand *op) | 2417 | int reg, struct operand *op) |
2483 | { | 2418 | { |
2484 | struct decode_cache *c = &ctxt->decode; | ||
2485 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | 2419 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; |
2486 | 2420 | ||
2487 | register_address_increment(c, &c->regs[reg], df * op->bytes); | 2421 | register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes); |
2488 | op->addr.mem.ea = register_address(c, c->regs[reg]); | 2422 | op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]); |
2489 | op->addr.mem.seg = seg; | 2423 | op->addr.mem.seg = seg; |
2490 | } | 2424 | } |
2491 | 2425 | ||
2492 | static int em_das(struct x86_emulate_ctxt *ctxt) | 2426 | static int em_das(struct x86_emulate_ctxt *ctxt) |
2493 | { | 2427 | { |
2494 | struct decode_cache *c = &ctxt->decode; | ||
2495 | u8 al, old_al; | 2428 | u8 al, old_al; |
2496 | bool af, cf, old_cf; | 2429 | bool af, cf, old_cf; |
2497 | 2430 | ||
2498 | cf = ctxt->eflags & X86_EFLAGS_CF; | 2431 | cf = ctxt->eflags & X86_EFLAGS_CF; |
2499 | al = c->dst.val; | 2432 | al = ctxt->dst.val; |
2500 | 2433 | ||
2501 | old_al = al; | 2434 | old_al = al; |
2502 | old_cf = cf; | 2435 | old_cf = cf; |
@@ -2514,12 +2447,12 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
2514 | cf = true; | 2447 | cf = true; |
2515 | } | 2448 | } |
2516 | 2449 | ||
2517 | c->dst.val = al; | 2450 | ctxt->dst.val = al; |
2518 | /* Set PF, ZF, SF */ | 2451 | /* Set PF, ZF, SF */ |
2519 | c->src.type = OP_IMM; | 2452 | ctxt->src.type = OP_IMM; |
2520 | c->src.val = 0; | 2453 | ctxt->src.val = 0; |
2521 | c->src.bytes = 1; | 2454 | ctxt->src.bytes = 1; |
2522 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 2455 | emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags); |
2523 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); | 2456 | ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); |
2524 | if (cf) | 2457 | if (cf) |
2525 | ctxt->eflags |= X86_EFLAGS_CF; | 2458 | ctxt->eflags |= X86_EFLAGS_CF; |
@@ -2530,175 +2463,189 @@ static int em_das(struct x86_emulate_ctxt *ctxt) | |||
2530 | 2463 | ||
2531 | static int em_call_far(struct x86_emulate_ctxt *ctxt) | 2464 | static int em_call_far(struct x86_emulate_ctxt *ctxt) |
2532 | { | 2465 | { |
2533 | struct decode_cache *c = &ctxt->decode; | ||
2534 | u16 sel, old_cs; | 2466 | u16 sel, old_cs; |
2535 | ulong old_eip; | 2467 | ulong old_eip; |
2536 | int rc; | 2468 | int rc; |
2537 | 2469 | ||
2538 | old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); | 2470 | old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); |
2539 | old_eip = c->eip; | 2471 | old_eip = ctxt->_eip; |
2540 | 2472 | ||
2541 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); | 2473 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
2542 | if (load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS)) | 2474 | if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) |
2543 | return X86EMUL_CONTINUE; | 2475 | return X86EMUL_CONTINUE; |
2544 | 2476 | ||
2545 | c->eip = 0; | 2477 | ctxt->_eip = 0; |
2546 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | 2478 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); |
2547 | 2479 | ||
2548 | c->src.val = old_cs; | 2480 | ctxt->src.val = old_cs; |
2549 | rc = em_push(ctxt); | 2481 | rc = em_push(ctxt); |
2550 | if (rc != X86EMUL_CONTINUE) | 2482 | if (rc != X86EMUL_CONTINUE) |
2551 | return rc; | 2483 | return rc; |
2552 | 2484 | ||
2553 | c->src.val = old_eip; | 2485 | ctxt->src.val = old_eip; |
2554 | return em_push(ctxt); | 2486 | return em_push(ctxt); |
2555 | } | 2487 | } |
2556 | 2488 | ||
2557 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | 2489 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) |
2558 | { | 2490 | { |
2559 | struct decode_cache *c = &ctxt->decode; | ||
2560 | int rc; | 2491 | int rc; |
2561 | 2492 | ||
2562 | c->dst.type = OP_REG; | 2493 | ctxt->dst.type = OP_REG; |
2563 | c->dst.addr.reg = &c->eip; | 2494 | ctxt->dst.addr.reg = &ctxt->_eip; |
2564 | c->dst.bytes = c->op_bytes; | 2495 | ctxt->dst.bytes = ctxt->op_bytes; |
2565 | rc = emulate_pop(ctxt, &c->dst.val, c->op_bytes); | 2496 | rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); |
2566 | if (rc != X86EMUL_CONTINUE) | 2497 | if (rc != X86EMUL_CONTINUE) |
2567 | return rc; | 2498 | return rc; |
2568 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val); | 2499 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val); |
2569 | return X86EMUL_CONTINUE; | 2500 | return X86EMUL_CONTINUE; |
2570 | } | 2501 | } |
2571 | 2502 | ||
2572 | static int em_add(struct x86_emulate_ctxt *ctxt) | 2503 | static int em_add(struct x86_emulate_ctxt *ctxt) |
2573 | { | 2504 | { |
2574 | struct decode_cache *c = &ctxt->decode; | 2505 | emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags); |
2575 | |||
2576 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | ||
2577 | return X86EMUL_CONTINUE; | 2506 | return X86EMUL_CONTINUE; |
2578 | } | 2507 | } |
2579 | 2508 | ||
2580 | static int em_or(struct x86_emulate_ctxt *ctxt) | 2509 | static int em_or(struct x86_emulate_ctxt *ctxt) |
2581 | { | 2510 | { |
2582 | struct decode_cache *c = &ctxt->decode; | 2511 | emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags); |
2583 | |||
2584 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | ||
2585 | return X86EMUL_CONTINUE; | 2512 | return X86EMUL_CONTINUE; |
2586 | } | 2513 | } |
2587 | 2514 | ||
2588 | static int em_adc(struct x86_emulate_ctxt *ctxt) | 2515 | static int em_adc(struct x86_emulate_ctxt *ctxt) |
2589 | { | 2516 | { |
2590 | struct decode_cache *c = &ctxt->decode; | 2517 | emulate_2op_SrcV("adc", ctxt->src, ctxt->dst, ctxt->eflags); |
2591 | |||
2592 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | ||
2593 | return X86EMUL_CONTINUE; | 2518 | return X86EMUL_CONTINUE; |
2594 | } | 2519 | } |
2595 | 2520 | ||
2596 | static int em_sbb(struct x86_emulate_ctxt *ctxt) | 2521 | static int em_sbb(struct x86_emulate_ctxt *ctxt) |
2597 | { | 2522 | { |
2598 | struct decode_cache *c = &ctxt->decode; | 2523 | emulate_2op_SrcV("sbb", ctxt->src, ctxt->dst, ctxt->eflags); |
2599 | |||
2600 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | ||
2601 | return X86EMUL_CONTINUE; | 2524 | return X86EMUL_CONTINUE; |
2602 | } | 2525 | } |
2603 | 2526 | ||
2604 | static int em_and(struct x86_emulate_ctxt *ctxt) | 2527 | static int em_and(struct x86_emulate_ctxt *ctxt) |
2605 | { | 2528 | { |
2606 | struct decode_cache *c = &ctxt->decode; | 2529 | emulate_2op_SrcV("and", ctxt->src, ctxt->dst, ctxt->eflags); |
2607 | |||
2608 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | ||
2609 | return X86EMUL_CONTINUE; | 2530 | return X86EMUL_CONTINUE; |
2610 | } | 2531 | } |
2611 | 2532 | ||
2612 | static int em_sub(struct x86_emulate_ctxt *ctxt) | 2533 | static int em_sub(struct x86_emulate_ctxt *ctxt) |
2613 | { | 2534 | { |
2614 | struct decode_cache *c = &ctxt->decode; | 2535 | emulate_2op_SrcV("sub", ctxt->src, ctxt->dst, ctxt->eflags); |
2615 | |||
2616 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); | ||
2617 | return X86EMUL_CONTINUE; | 2536 | return X86EMUL_CONTINUE; |
2618 | } | 2537 | } |
2619 | 2538 | ||
2620 | static int em_xor(struct x86_emulate_ctxt *ctxt) | 2539 | static int em_xor(struct x86_emulate_ctxt *ctxt) |
2621 | { | 2540 | { |
2622 | struct decode_cache *c = &ctxt->decode; | 2541 | emulate_2op_SrcV("xor", ctxt->src, ctxt->dst, ctxt->eflags); |
2623 | |||
2624 | emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags); | ||
2625 | return X86EMUL_CONTINUE; | 2542 | return X86EMUL_CONTINUE; |
2626 | } | 2543 | } |
2627 | 2544 | ||
2628 | static int em_cmp(struct x86_emulate_ctxt *ctxt) | 2545 | static int em_cmp(struct x86_emulate_ctxt *ctxt) |
2629 | { | 2546 | { |
2630 | struct decode_cache *c = &ctxt->decode; | 2547 | emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags); |
2631 | |||
2632 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
2633 | /* Disable writeback. */ | 2548 | /* Disable writeback. */ |
2634 | c->dst.type = OP_NONE; | 2549 | ctxt->dst.type = OP_NONE; |
2635 | return X86EMUL_CONTINUE; | 2550 | return X86EMUL_CONTINUE; |
2636 | } | 2551 | } |
2637 | 2552 | ||
2638 | static int em_imul(struct x86_emulate_ctxt *ctxt) | 2553 | static int em_test(struct x86_emulate_ctxt *ctxt) |
2554 | { | ||
2555 | emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags); | ||
2556 | return X86EMUL_CONTINUE; | ||
2557 | } | ||
2558 | |||
2559 | static int em_xchg(struct x86_emulate_ctxt *ctxt) | ||
2639 | { | 2560 | { |
2640 | struct decode_cache *c = &ctxt->decode; | 2561 | /* Write back the register source. */ |
2562 | ctxt->src.val = ctxt->dst.val; | ||
2563 | write_register_operand(&ctxt->src); | ||
2641 | 2564 | ||
2642 | emulate_2op_SrcV_nobyte("imul", c->src, c->dst, ctxt->eflags); | 2565 | /* Write back the memory destination with implicit LOCK prefix. */ |
2566 | ctxt->dst.val = ctxt->src.orig_val; | ||
2567 | ctxt->lock_prefix = 1; | ||
2643 | return X86EMUL_CONTINUE; | 2568 | return X86EMUL_CONTINUE; |
2644 | } | 2569 | } |
2645 | 2570 | ||
2646 | static int em_imul_3op(struct x86_emulate_ctxt *ctxt) | 2571 | static int em_imul(struct x86_emulate_ctxt *ctxt) |
2647 | { | 2572 | { |
2648 | struct decode_cache *c = &ctxt->decode; | 2573 | emulate_2op_SrcV_nobyte("imul", ctxt->src, ctxt->dst, ctxt->eflags); |
2574 | return X86EMUL_CONTINUE; | ||
2575 | } | ||
2649 | 2576 | ||
2650 | c->dst.val = c->src2.val; | 2577 | static int em_imul_3op(struct x86_emulate_ctxt *ctxt) |
2578 | { | ||
2579 | ctxt->dst.val = ctxt->src2.val; | ||
2651 | return em_imul(ctxt); | 2580 | return em_imul(ctxt); |
2652 | } | 2581 | } |
2653 | 2582 | ||
2654 | static int em_cwd(struct x86_emulate_ctxt *ctxt) | 2583 | static int em_cwd(struct x86_emulate_ctxt *ctxt) |
2655 | { | 2584 | { |
2656 | struct decode_cache *c = &ctxt->decode; | 2585 | ctxt->dst.type = OP_REG; |
2657 | 2586 | ctxt->dst.bytes = ctxt->src.bytes; | |
2658 | c->dst.type = OP_REG; | 2587 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; |
2659 | c->dst.bytes = c->src.bytes; | 2588 | ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1); |
2660 | c->dst.addr.reg = &c->regs[VCPU_REGS_RDX]; | ||
2661 | c->dst.val = ~((c->src.val >> (c->src.bytes * 8 - 1)) - 1); | ||
2662 | 2589 | ||
2663 | return X86EMUL_CONTINUE; | 2590 | return X86EMUL_CONTINUE; |
2664 | } | 2591 | } |
2665 | 2592 | ||
2666 | static int em_rdtsc(struct x86_emulate_ctxt *ctxt) | 2593 | static int em_rdtsc(struct x86_emulate_ctxt *ctxt) |
2667 | { | 2594 | { |
2668 | struct decode_cache *c = &ctxt->decode; | ||
2669 | u64 tsc = 0; | 2595 | u64 tsc = 0; |
2670 | 2596 | ||
2671 | ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); | 2597 | ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); |
2672 | c->regs[VCPU_REGS_RAX] = (u32)tsc; | 2598 | ctxt->regs[VCPU_REGS_RAX] = (u32)tsc; |
2673 | c->regs[VCPU_REGS_RDX] = tsc >> 32; | 2599 | ctxt->regs[VCPU_REGS_RDX] = tsc >> 32; |
2674 | return X86EMUL_CONTINUE; | 2600 | return X86EMUL_CONTINUE; |
2675 | } | 2601 | } |
2676 | 2602 | ||
2677 | static int em_mov(struct x86_emulate_ctxt *ctxt) | 2603 | static int em_mov(struct x86_emulate_ctxt *ctxt) |
2678 | { | 2604 | { |
2679 | struct decode_cache *c = &ctxt->decode; | 2605 | ctxt->dst.val = ctxt->src.val; |
2680 | c->dst.val = c->src.val; | ||
2681 | return X86EMUL_CONTINUE; | 2606 | return X86EMUL_CONTINUE; |
2682 | } | 2607 | } |
2683 | 2608 | ||
2609 | static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) | ||
2610 | { | ||
2611 | if (ctxt->modrm_reg > VCPU_SREG_GS) | ||
2612 | return emulate_ud(ctxt); | ||
2613 | |||
2614 | ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg); | ||
2615 | return X86EMUL_CONTINUE; | ||
2616 | } | ||
2617 | |||
2618 | static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt) | ||
2619 | { | ||
2620 | u16 sel = ctxt->src.val; | ||
2621 | |||
2622 | if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS) | ||
2623 | return emulate_ud(ctxt); | ||
2624 | |||
2625 | if (ctxt->modrm_reg == VCPU_SREG_SS) | ||
2626 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; | ||
2627 | |||
2628 | /* Disable writeback. */ | ||
2629 | ctxt->dst.type = OP_NONE; | ||
2630 | return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); | ||
2631 | } | ||
2632 | |||
2684 | static int em_movdqu(struct x86_emulate_ctxt *ctxt) | 2633 | static int em_movdqu(struct x86_emulate_ctxt *ctxt) |
2685 | { | 2634 | { |
2686 | struct decode_cache *c = &ctxt->decode; | 2635 | memcpy(&ctxt->dst.vec_val, &ctxt->src.vec_val, ctxt->op_bytes); |
2687 | memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes); | ||
2688 | return X86EMUL_CONTINUE; | 2636 | return X86EMUL_CONTINUE; |
2689 | } | 2637 | } |
2690 | 2638 | ||
2691 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) | 2639 | static int em_invlpg(struct x86_emulate_ctxt *ctxt) |
2692 | { | 2640 | { |
2693 | struct decode_cache *c = &ctxt->decode; | ||
2694 | int rc; | 2641 | int rc; |
2695 | ulong linear; | 2642 | ulong linear; |
2696 | 2643 | ||
2697 | rc = linearize(ctxt, c->src.addr.mem, 1, false, &linear); | 2644 | rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear); |
2698 | if (rc == X86EMUL_CONTINUE) | 2645 | if (rc == X86EMUL_CONTINUE) |
2699 | ctxt->ops->invlpg(ctxt, linear); | 2646 | ctxt->ops->invlpg(ctxt, linear); |
2700 | /* Disable writeback. */ | 2647 | /* Disable writeback. */ |
2701 | c->dst.type = OP_NONE; | 2648 | ctxt->dst.type = OP_NONE; |
2702 | return X86EMUL_CONTINUE; | 2649 | return X86EMUL_CONTINUE; |
2703 | } | 2650 | } |
2704 | 2651 | ||
@@ -2714,10 +2661,9 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) | |||
2714 | 2661 | ||
2715 | static int em_vmcall(struct x86_emulate_ctxt *ctxt) | 2662 | static int em_vmcall(struct x86_emulate_ctxt *ctxt) |
2716 | { | 2663 | { |
2717 | struct decode_cache *c = &ctxt->decode; | ||
2718 | int rc; | 2664 | int rc; |
2719 | 2665 | ||
2720 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | 2666 | if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1) |
2721 | return X86EMUL_UNHANDLEABLE; | 2667 | return X86EMUL_UNHANDLEABLE; |
2722 | 2668 | ||
2723 | rc = ctxt->ops->fix_hypercall(ctxt); | 2669 | rc = ctxt->ops->fix_hypercall(ctxt); |
@@ -2725,73 +2671,104 @@ static int em_vmcall(struct x86_emulate_ctxt *ctxt) | |||
2725 | return rc; | 2671 | return rc; |
2726 | 2672 | ||
2727 | /* Let the processor re-execute the fixed hypercall */ | 2673 | /* Let the processor re-execute the fixed hypercall */ |
2728 | c->eip = ctxt->eip; | 2674 | ctxt->_eip = ctxt->eip; |
2729 | /* Disable writeback. */ | 2675 | /* Disable writeback. */ |
2730 | c->dst.type = OP_NONE; | 2676 | ctxt->dst.type = OP_NONE; |
2731 | return X86EMUL_CONTINUE; | 2677 | return X86EMUL_CONTINUE; |
2732 | } | 2678 | } |
2733 | 2679 | ||
2734 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) | 2680 | static int em_lgdt(struct x86_emulate_ctxt *ctxt) |
2735 | { | 2681 | { |
2736 | struct decode_cache *c = &ctxt->decode; | ||
2737 | struct desc_ptr desc_ptr; | 2682 | struct desc_ptr desc_ptr; |
2738 | int rc; | 2683 | int rc; |
2739 | 2684 | ||
2740 | rc = read_descriptor(ctxt, c->src.addr.mem, | 2685 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, |
2741 | &desc_ptr.size, &desc_ptr.address, | 2686 | &desc_ptr.size, &desc_ptr.address, |
2742 | c->op_bytes); | 2687 | ctxt->op_bytes); |
2743 | if (rc != X86EMUL_CONTINUE) | 2688 | if (rc != X86EMUL_CONTINUE) |
2744 | return rc; | 2689 | return rc; |
2745 | ctxt->ops->set_gdt(ctxt, &desc_ptr); | 2690 | ctxt->ops->set_gdt(ctxt, &desc_ptr); |
2746 | /* Disable writeback. */ | 2691 | /* Disable writeback. */ |
2747 | c->dst.type = OP_NONE; | 2692 | ctxt->dst.type = OP_NONE; |
2748 | return X86EMUL_CONTINUE; | 2693 | return X86EMUL_CONTINUE; |
2749 | } | 2694 | } |
2750 | 2695 | ||
2751 | static int em_vmmcall(struct x86_emulate_ctxt *ctxt) | 2696 | static int em_vmmcall(struct x86_emulate_ctxt *ctxt) |
2752 | { | 2697 | { |
2753 | struct decode_cache *c = &ctxt->decode; | ||
2754 | int rc; | 2698 | int rc; |
2755 | 2699 | ||
2756 | rc = ctxt->ops->fix_hypercall(ctxt); | 2700 | rc = ctxt->ops->fix_hypercall(ctxt); |
2757 | 2701 | ||
2758 | /* Disable writeback. */ | 2702 | /* Disable writeback. */ |
2759 | c->dst.type = OP_NONE; | 2703 | ctxt->dst.type = OP_NONE; |
2760 | return rc; | 2704 | return rc; |
2761 | } | 2705 | } |
2762 | 2706 | ||
2763 | static int em_lidt(struct x86_emulate_ctxt *ctxt) | 2707 | static int em_lidt(struct x86_emulate_ctxt *ctxt) |
2764 | { | 2708 | { |
2765 | struct decode_cache *c = &ctxt->decode; | ||
2766 | struct desc_ptr desc_ptr; | 2709 | struct desc_ptr desc_ptr; |
2767 | int rc; | 2710 | int rc; |
2768 | 2711 | ||
2769 | rc = read_descriptor(ctxt, c->src.addr.mem, | 2712 | rc = read_descriptor(ctxt, ctxt->src.addr.mem, |
2770 | &desc_ptr.size, &desc_ptr.address, | 2713 | &desc_ptr.size, &desc_ptr.address, |
2771 | c->op_bytes); | 2714 | ctxt->op_bytes); |
2772 | if (rc != X86EMUL_CONTINUE) | 2715 | if (rc != X86EMUL_CONTINUE) |
2773 | return rc; | 2716 | return rc; |
2774 | ctxt->ops->set_idt(ctxt, &desc_ptr); | 2717 | ctxt->ops->set_idt(ctxt, &desc_ptr); |
2775 | /* Disable writeback. */ | 2718 | /* Disable writeback. */ |
2776 | c->dst.type = OP_NONE; | 2719 | ctxt->dst.type = OP_NONE; |
2777 | return X86EMUL_CONTINUE; | 2720 | return X86EMUL_CONTINUE; |
2778 | } | 2721 | } |
2779 | 2722 | ||
2780 | static int em_smsw(struct x86_emulate_ctxt *ctxt) | 2723 | static int em_smsw(struct x86_emulate_ctxt *ctxt) |
2781 | { | 2724 | { |
2782 | struct decode_cache *c = &ctxt->decode; | 2725 | ctxt->dst.bytes = 2; |
2783 | 2726 | ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); | |
2784 | c->dst.bytes = 2; | ||
2785 | c->dst.val = ctxt->ops->get_cr(ctxt, 0); | ||
2786 | return X86EMUL_CONTINUE; | 2727 | return X86EMUL_CONTINUE; |
2787 | } | 2728 | } |
2788 | 2729 | ||
2789 | static int em_lmsw(struct x86_emulate_ctxt *ctxt) | 2730 | static int em_lmsw(struct x86_emulate_ctxt *ctxt) |
2790 | { | 2731 | { |
2791 | struct decode_cache *c = &ctxt->decode; | ||
2792 | ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) | 2732 | ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul) |
2793 | | (c->src.val & 0x0f)); | 2733 | | (ctxt->src.val & 0x0f)); |
2794 | c->dst.type = OP_NONE; | 2734 | ctxt->dst.type = OP_NONE; |
2735 | return X86EMUL_CONTINUE; | ||
2736 | } | ||
2737 | |||
2738 | static int em_loop(struct x86_emulate_ctxt *ctxt) | ||
2739 | { | ||
2740 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); | ||
2741 | if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) && | ||
2742 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) | ||
2743 | jmp_rel(ctxt, ctxt->src.val); | ||
2744 | |||
2745 | return X86EMUL_CONTINUE; | ||
2746 | } | ||
2747 | |||
2748 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) | ||
2749 | { | ||
2750 | if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) | ||
2751 | jmp_rel(ctxt, ctxt->src.val); | ||
2752 | |||
2753 | return X86EMUL_CONTINUE; | ||
2754 | } | ||
2755 | |||
2756 | static int em_cli(struct x86_emulate_ctxt *ctxt) | ||
2757 | { | ||
2758 | if (emulator_bad_iopl(ctxt)) | ||
2759 | return emulate_gp(ctxt, 0); | ||
2760 | |||
2761 | ctxt->eflags &= ~X86_EFLAGS_IF; | ||
2762 | return X86EMUL_CONTINUE; | ||
2763 | } | ||
2764 | |||
2765 | static int em_sti(struct x86_emulate_ctxt *ctxt) | ||
2766 | { | ||
2767 | if (emulator_bad_iopl(ctxt)) | ||
2768 | return emulate_gp(ctxt, 0); | ||
2769 | |||
2770 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; | ||
2771 | ctxt->eflags |= X86_EFLAGS_IF; | ||
2795 | return X86EMUL_CONTINUE; | 2772 | return X86EMUL_CONTINUE; |
2796 | } | 2773 | } |
2797 | 2774 | ||
@@ -2809,9 +2786,7 @@ static bool valid_cr(int nr) | |||
2809 | 2786 | ||
2810 | static int check_cr_read(struct x86_emulate_ctxt *ctxt) | 2787 | static int check_cr_read(struct x86_emulate_ctxt *ctxt) |
2811 | { | 2788 | { |
2812 | struct decode_cache *c = &ctxt->decode; | 2789 | if (!valid_cr(ctxt->modrm_reg)) |
2813 | |||
2814 | if (!valid_cr(c->modrm_reg)) | ||
2815 | return emulate_ud(ctxt); | 2790 | return emulate_ud(ctxt); |
2816 | 2791 | ||
2817 | return X86EMUL_CONTINUE; | 2792 | return X86EMUL_CONTINUE; |
@@ -2819,9 +2794,8 @@ static int check_cr_read(struct x86_emulate_ctxt *ctxt) | |||
2819 | 2794 | ||
2820 | static int check_cr_write(struct x86_emulate_ctxt *ctxt) | 2795 | static int check_cr_write(struct x86_emulate_ctxt *ctxt) |
2821 | { | 2796 | { |
2822 | struct decode_cache *c = &ctxt->decode; | 2797 | u64 new_val = ctxt->src.val64; |
2823 | u64 new_val = c->src.val64; | 2798 | int cr = ctxt->modrm_reg; |
2824 | int cr = c->modrm_reg; | ||
2825 | u64 efer = 0; | 2799 | u64 efer = 0; |
2826 | 2800 | ||
2827 | static u64 cr_reserved_bits[] = { | 2801 | static u64 cr_reserved_bits[] = { |
@@ -2898,8 +2872,7 @@ static int check_dr7_gd(struct x86_emulate_ctxt *ctxt) | |||
2898 | 2872 | ||
2899 | static int check_dr_read(struct x86_emulate_ctxt *ctxt) | 2873 | static int check_dr_read(struct x86_emulate_ctxt *ctxt) |
2900 | { | 2874 | { |
2901 | struct decode_cache *c = &ctxt->decode; | 2875 | int dr = ctxt->modrm_reg; |
2902 | int dr = c->modrm_reg; | ||
2903 | u64 cr4; | 2876 | u64 cr4; |
2904 | 2877 | ||
2905 | if (dr > 7) | 2878 | if (dr > 7) |
@@ -2917,9 +2890,8 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) | |||
2917 | 2890 | ||
2918 | static int check_dr_write(struct x86_emulate_ctxt *ctxt) | 2891 | static int check_dr_write(struct x86_emulate_ctxt *ctxt) |
2919 | { | 2892 | { |
2920 | struct decode_cache *c = &ctxt->decode; | 2893 | u64 new_val = ctxt->src.val64; |
2921 | u64 new_val = c->src.val64; | 2894 | int dr = ctxt->modrm_reg; |
2922 | int dr = c->modrm_reg; | ||
2923 | 2895 | ||
2924 | if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) | 2896 | if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL)) |
2925 | return emulate_gp(ctxt, 0); | 2897 | return emulate_gp(ctxt, 0); |
@@ -2941,7 +2913,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) | |||
2941 | 2913 | ||
2942 | static int check_svme_pa(struct x86_emulate_ctxt *ctxt) | 2914 | static int check_svme_pa(struct x86_emulate_ctxt *ctxt) |
2943 | { | 2915 | { |
2944 | u64 rax = ctxt->decode.regs[VCPU_REGS_RAX]; | 2916 | u64 rax = ctxt->regs[VCPU_REGS_RAX]; |
2945 | 2917 | ||
2946 | /* Valid physical address? */ | 2918 | /* Valid physical address? */ |
2947 | if (rax & 0xffff000000000000ULL) | 2919 | if (rax & 0xffff000000000000ULL) |
@@ -2963,7 +2935,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) | |||
2963 | static int check_rdpmc(struct x86_emulate_ctxt *ctxt) | 2935 | static int check_rdpmc(struct x86_emulate_ctxt *ctxt) |
2964 | { | 2936 | { |
2965 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); | 2937 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); |
2966 | u64 rcx = ctxt->decode.regs[VCPU_REGS_RCX]; | 2938 | u64 rcx = ctxt->regs[VCPU_REGS_RCX]; |
2967 | 2939 | ||
2968 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || | 2940 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || |
2969 | (rcx > 3)) | 2941 | (rcx > 3)) |
@@ -2974,10 +2946,8 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) | |||
2974 | 2946 | ||
2975 | static int check_perm_in(struct x86_emulate_ctxt *ctxt) | 2947 | static int check_perm_in(struct x86_emulate_ctxt *ctxt) |
2976 | { | 2948 | { |
2977 | struct decode_cache *c = &ctxt->decode; | 2949 | ctxt->dst.bytes = min(ctxt->dst.bytes, 4u); |
2978 | 2950 | if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes)) | |
2979 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
2980 | if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) | ||
2981 | return emulate_gp(ctxt, 0); | 2951 | return emulate_gp(ctxt, 0); |
2982 | 2952 | ||
2983 | return X86EMUL_CONTINUE; | 2953 | return X86EMUL_CONTINUE; |
@@ -2985,10 +2955,8 @@ static int check_perm_in(struct x86_emulate_ctxt *ctxt) | |||
2985 | 2955 | ||
2986 | static int check_perm_out(struct x86_emulate_ctxt *ctxt) | 2956 | static int check_perm_out(struct x86_emulate_ctxt *ctxt) |
2987 | { | 2957 | { |
2988 | struct decode_cache *c = &ctxt->decode; | 2958 | ctxt->src.bytes = min(ctxt->src.bytes, 4u); |
2989 | 2959 | if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes)) | |
2990 | c->src.bytes = min(c->src.bytes, 4u); | ||
2991 | if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) | ||
2992 | return emulate_gp(ctxt, 0); | 2960 | return emulate_gp(ctxt, 0); |
2993 | 2961 | ||
2994 | return X86EMUL_CONTINUE; | 2962 | return X86EMUL_CONTINUE; |
@@ -3165,12 +3133,15 @@ static struct opcode opcode_table[256] = { | |||
3165 | G(DstMem | SrcImm | ModRM | Group, group1), | 3133 | G(DstMem | SrcImm | ModRM | Group, group1), |
3166 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), | 3134 | G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), |
3167 | G(DstMem | SrcImmByte | ModRM | Group, group1), | 3135 | G(DstMem | SrcImmByte | ModRM | Group, group1), |
3168 | D2bv(DstMem | SrcReg | ModRM), D2bv(DstMem | SrcReg | ModRM | Lock), | 3136 | I2bv(DstMem | SrcReg | ModRM, em_test), |
3137 | I2bv(DstMem | SrcReg | ModRM | Lock, em_xchg), | ||
3169 | /* 0x88 - 0x8F */ | 3138 | /* 0x88 - 0x8F */ |
3170 | I2bv(DstMem | SrcReg | ModRM | Mov, em_mov), | 3139 | I2bv(DstMem | SrcReg | ModRM | Mov, em_mov), |
3171 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), | 3140 | I2bv(DstReg | SrcMem | ModRM | Mov, em_mov), |
3172 | D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg), | 3141 | I(DstMem | SrcNone | ModRM | Mov, em_mov_rm_sreg), |
3173 | D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A), | 3142 | D(ModRM | SrcMem | NoAccess | DstReg), |
3143 | I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm), | ||
3144 | G(0, group1A), | ||
3174 | /* 0x90 - 0x97 */ | 3145 | /* 0x90 - 0x97 */ |
3175 | DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), | 3146 | DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)), |
3176 | /* 0x98 - 0x9F */ | 3147 | /* 0x98 - 0x9F */ |
@@ -3184,7 +3155,7 @@ static struct opcode opcode_table[256] = { | |||
3184 | I2bv(SrcSI | DstDI | Mov | String, em_mov), | 3155 | I2bv(SrcSI | DstDI | Mov | String, em_mov), |
3185 | I2bv(SrcSI | DstDI | String, em_cmp), | 3156 | I2bv(SrcSI | DstDI | String, em_cmp), |
3186 | /* 0xA8 - 0xAF */ | 3157 | /* 0xA8 - 0xAF */ |
3187 | D2bv(DstAcc | SrcImm), | 3158 | I2bv(DstAcc | SrcImm, em_test), |
3188 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), | 3159 | I2bv(SrcAcc | DstDI | Mov | String, em_mov), |
3189 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), | 3160 | I2bv(SrcSI | DstAcc | Mov | String, em_mov), |
3190 | I2bv(SrcAcc | DstDI | String, em_cmp), | 3161 | I2bv(SrcAcc | DstDI | String, em_cmp), |
@@ -3195,25 +3166,26 @@ static struct opcode opcode_table[256] = { | |||
3195 | /* 0xC0 - 0xC7 */ | 3166 | /* 0xC0 - 0xC7 */ |
3196 | D2bv(DstMem | SrcImmByte | ModRM), | 3167 | D2bv(DstMem | SrcImmByte | ModRM), |
3197 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), | 3168 | I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), |
3198 | D(ImplicitOps | Stack), | 3169 | I(ImplicitOps | Stack, em_ret), |
3199 | D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64), | 3170 | D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64), |
3200 | G(ByteOp, group11), G(0, group11), | 3171 | G(ByteOp, group11), G(0, group11), |
3201 | /* 0xC8 - 0xCF */ | 3172 | /* 0xC8 - 0xCF */ |
3202 | N, N, N, D(ImplicitOps | Stack), | 3173 | N, N, N, I(ImplicitOps | Stack, em_ret_far), |
3203 | D(ImplicitOps), DI(SrcImmByte, intn), | 3174 | D(ImplicitOps), DI(SrcImmByte, intn), |
3204 | D(ImplicitOps | No64), DI(ImplicitOps, iret), | 3175 | D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), |
3205 | /* 0xD0 - 0xD7 */ | 3176 | /* 0xD0 - 0xD7 */ |
3206 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), | 3177 | D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), |
3207 | N, N, N, N, | 3178 | N, N, N, N, |
3208 | /* 0xD8 - 0xDF */ | 3179 | /* 0xD8 - 0xDF */ |
3209 | N, N, N, N, N, N, N, N, | 3180 | N, N, N, N, N, N, N, N, |
3210 | /* 0xE0 - 0xE7 */ | 3181 | /* 0xE0 - 0xE7 */ |
3211 | X4(D(SrcImmByte)), | 3182 | X3(I(SrcImmByte, em_loop)), |
3183 | I(SrcImmByte, em_jcxz), | ||
3212 | D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), | 3184 | D2bvIP(SrcImmUByte | DstAcc, in, check_perm_in), |
3213 | D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), | 3185 | D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out), |
3214 | /* 0xE8 - 0xEF */ | 3186 | /* 0xE8 - 0xEF */ |
3215 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), | 3187 | D(SrcImm | Stack), D(SrcImm | ImplicitOps), |
3216 | D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps), | 3188 | I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps), |
3217 | D2bvIP(SrcDX | DstAcc, in, check_perm_in), | 3189 | D2bvIP(SrcDX | DstAcc, in, check_perm_in), |
3218 | D2bvIP(SrcAcc | DstDX, out, check_perm_out), | 3190 | D2bvIP(SrcAcc | DstDX, out, check_perm_out), |
3219 | /* 0xF0 - 0xF7 */ | 3191 | /* 0xF0 - 0xF7 */ |
@@ -3221,14 +3193,16 @@ static struct opcode opcode_table[256] = { | |||
3221 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), | 3193 | DI(ImplicitOps | Priv, hlt), D(ImplicitOps), |
3222 | G(ByteOp, group3), G(0, group3), | 3194 | G(ByteOp, group3), G(0, group3), |
3223 | /* 0xF8 - 0xFF */ | 3195 | /* 0xF8 - 0xFF */ |
3224 | D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), | 3196 | D(ImplicitOps), D(ImplicitOps), |
3197 | I(ImplicitOps, em_cli), I(ImplicitOps, em_sti), | ||
3225 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), | 3198 | D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), |
3226 | }; | 3199 | }; |
3227 | 3200 | ||
3228 | static struct opcode twobyte_table[256] = { | 3201 | static struct opcode twobyte_table[256] = { |
3229 | /* 0x00 - 0x0F */ | 3202 | /* 0x00 - 0x0F */ |
3230 | G(0, group6), GD(0, &group7), N, N, | 3203 | G(0, group6), GD(0, &group7), N, N, |
3231 | N, D(ImplicitOps | VendorSpecific), DI(ImplicitOps | Priv, clts), N, | 3204 | N, I(ImplicitOps | VendorSpecific, em_syscall), |
3205 | II(ImplicitOps | Priv, em_clts, clts), N, | ||
3232 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, | 3206 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
3233 | N, D(ImplicitOps | ModRM), N, N, | 3207 | N, D(ImplicitOps | ModRM), N, N, |
3234 | /* 0x10 - 0x1F */ | 3208 | /* 0x10 - 0x1F */ |
@@ -3245,7 +3219,8 @@ static struct opcode twobyte_table[256] = { | |||
3245 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), | 3219 | IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), |
3246 | DI(ImplicitOps | Priv, rdmsr), | 3220 | DI(ImplicitOps | Priv, rdmsr), |
3247 | DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), | 3221 | DIP(ImplicitOps | Priv, rdpmc, check_rdpmc), |
3248 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), | 3222 | I(ImplicitOps | VendorSpecific, em_sysenter), |
3223 | I(ImplicitOps | Priv | VendorSpecific, em_sysexit), | ||
3249 | N, N, | 3224 | N, N, |
3250 | N, N, N, N, N, N, N, N, | 3225 | N, N, N, N, N, N, N, N, |
3251 | /* 0x40 - 0x4F */ | 3226 | /* 0x40 - 0x4F */ |
@@ -3313,11 +3288,11 @@ static struct opcode twobyte_table[256] = { | |||
3313 | #undef I2bv | 3288 | #undef I2bv |
3314 | #undef I6ALU | 3289 | #undef I6ALU |
3315 | 3290 | ||
3316 | static unsigned imm_size(struct decode_cache *c) | 3291 | static unsigned imm_size(struct x86_emulate_ctxt *ctxt) |
3317 | { | 3292 | { |
3318 | unsigned size; | 3293 | unsigned size; |
3319 | 3294 | ||
3320 | size = (c->d & ByteOp) ? 1 : c->op_bytes; | 3295 | size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3321 | if (size == 8) | 3296 | if (size == 8) |
3322 | size = 4; | 3297 | size = 4; |
3323 | return size; | 3298 | return size; |
@@ -3326,23 +3301,21 @@ static unsigned imm_size(struct decode_cache *c) | |||
3326 | static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, | 3301 | static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, |
3327 | unsigned size, bool sign_extension) | 3302 | unsigned size, bool sign_extension) |
3328 | { | 3303 | { |
3329 | struct decode_cache *c = &ctxt->decode; | ||
3330 | struct x86_emulate_ops *ops = ctxt->ops; | ||
3331 | int rc = X86EMUL_CONTINUE; | 3304 | int rc = X86EMUL_CONTINUE; |
3332 | 3305 | ||
3333 | op->type = OP_IMM; | 3306 | op->type = OP_IMM; |
3334 | op->bytes = size; | 3307 | op->bytes = size; |
3335 | op->addr.mem.ea = c->eip; | 3308 | op->addr.mem.ea = ctxt->_eip; |
3336 | /* NB. Immediates are sign-extended as necessary. */ | 3309 | /* NB. Immediates are sign-extended as necessary. */ |
3337 | switch (op->bytes) { | 3310 | switch (op->bytes) { |
3338 | case 1: | 3311 | case 1: |
3339 | op->val = insn_fetch(s8, 1, c->eip); | 3312 | op->val = insn_fetch(s8, 1, ctxt->_eip); |
3340 | break; | 3313 | break; |
3341 | case 2: | 3314 | case 2: |
3342 | op->val = insn_fetch(s16, 2, c->eip); | 3315 | op->val = insn_fetch(s16, 2, ctxt->_eip); |
3343 | break; | 3316 | break; |
3344 | case 4: | 3317 | case 4: |
3345 | op->val = insn_fetch(s32, 4, c->eip); | 3318 | op->val = insn_fetch(s32, 4, ctxt->_eip); |
3346 | break; | 3319 | break; |
3347 | } | 3320 | } |
3348 | if (!sign_extension) { | 3321 | if (!sign_extension) { |
@@ -3362,11 +3335,8 @@ done: | |||
3362 | return rc; | 3335 | return rc; |
3363 | } | 3336 | } |
3364 | 3337 | ||
3365 | int | 3338 | int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) |
3366 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | ||
3367 | { | 3339 | { |
3368 | struct x86_emulate_ops *ops = ctxt->ops; | ||
3369 | struct decode_cache *c = &ctxt->decode; | ||
3370 | int rc = X86EMUL_CONTINUE; | 3340 | int rc = X86EMUL_CONTINUE; |
3371 | int mode = ctxt->mode; | 3341 | int mode = ctxt->mode; |
3372 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; | 3342 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; |
@@ -3374,11 +3344,11 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
3374 | struct opcode opcode; | 3344 | struct opcode opcode; |
3375 | struct operand memop = { .type = OP_NONE }, *memopp = NULL; | 3345 | struct operand memop = { .type = OP_NONE }, *memopp = NULL; |
3376 | 3346 | ||
3377 | c->eip = ctxt->eip; | 3347 | ctxt->_eip = ctxt->eip; |
3378 | c->fetch.start = c->eip; | 3348 | ctxt->fetch.start = ctxt->_eip; |
3379 | c->fetch.end = c->fetch.start + insn_len; | 3349 | ctxt->fetch.end = ctxt->fetch.start + insn_len; |
3380 | if (insn_len > 0) | 3350 | if (insn_len > 0) |
3381 | memcpy(c->fetch.data, insn, insn_len); | 3351 | memcpy(ctxt->fetch.data, insn, insn_len); |
3382 | 3352 | ||
3383 | switch (mode) { | 3353 | switch (mode) { |
3384 | case X86EMUL_MODE_REAL: | 3354 | case X86EMUL_MODE_REAL: |
@@ -3399,46 +3369,46 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
3399 | return -1; | 3369 | return -1; |
3400 | } | 3370 | } |
3401 | 3371 | ||
3402 | c->op_bytes = def_op_bytes; | 3372 | ctxt->op_bytes = def_op_bytes; |
3403 | c->ad_bytes = def_ad_bytes; | 3373 | ctxt->ad_bytes = def_ad_bytes; |
3404 | 3374 | ||
3405 | /* Legacy prefixes. */ | 3375 | /* Legacy prefixes. */ |
3406 | for (;;) { | 3376 | for (;;) { |
3407 | switch (c->b = insn_fetch(u8, 1, c->eip)) { | 3377 | switch (ctxt->b = insn_fetch(u8, 1, ctxt->_eip)) { |
3408 | case 0x66: /* operand-size override */ | 3378 | case 0x66: /* operand-size override */ |
3409 | op_prefix = true; | 3379 | op_prefix = true; |
3410 | /* switch between 2/4 bytes */ | 3380 | /* switch between 2/4 bytes */ |
3411 | c->op_bytes = def_op_bytes ^ 6; | 3381 | ctxt->op_bytes = def_op_bytes ^ 6; |
3412 | break; | 3382 | break; |
3413 | case 0x67: /* address-size override */ | 3383 | case 0x67: /* address-size override */ |
3414 | if (mode == X86EMUL_MODE_PROT64) | 3384 | if (mode == X86EMUL_MODE_PROT64) |
3415 | /* switch between 4/8 bytes */ | 3385 | /* switch between 4/8 bytes */ |
3416 | c->ad_bytes = def_ad_bytes ^ 12; | 3386 | ctxt->ad_bytes = def_ad_bytes ^ 12; |
3417 | else | 3387 | else |
3418 | /* switch between 2/4 bytes */ | 3388 | /* switch between 2/4 bytes */ |
3419 | c->ad_bytes = def_ad_bytes ^ 6; | 3389 | ctxt->ad_bytes = def_ad_bytes ^ 6; |
3420 | break; | 3390 | break; |
3421 | case 0x26: /* ES override */ | 3391 | case 0x26: /* ES override */ |
3422 | case 0x2e: /* CS override */ | 3392 | case 0x2e: /* CS override */ |
3423 | case 0x36: /* SS override */ | 3393 | case 0x36: /* SS override */ |
3424 | case 0x3e: /* DS override */ | 3394 | case 0x3e: /* DS override */ |
3425 | set_seg_override(c, (c->b >> 3) & 3); | 3395 | set_seg_override(ctxt, (ctxt->b >> 3) & 3); |
3426 | break; | 3396 | break; |
3427 | case 0x64: /* FS override */ | 3397 | case 0x64: /* FS override */ |
3428 | case 0x65: /* GS override */ | 3398 | case 0x65: /* GS override */ |
3429 | set_seg_override(c, c->b & 7); | 3399 | set_seg_override(ctxt, ctxt->b & 7); |
3430 | break; | 3400 | break; |
3431 | case 0x40 ... 0x4f: /* REX */ | 3401 | case 0x40 ... 0x4f: /* REX */ |
3432 | if (mode != X86EMUL_MODE_PROT64) | 3402 | if (mode != X86EMUL_MODE_PROT64) |
3433 | goto done_prefixes; | 3403 | goto done_prefixes; |
3434 | c->rex_prefix = c->b; | 3404 | ctxt->rex_prefix = ctxt->b; |
3435 | continue; | 3405 | continue; |
3436 | case 0xf0: /* LOCK */ | 3406 | case 0xf0: /* LOCK */ |
3437 | c->lock_prefix = 1; | 3407 | ctxt->lock_prefix = 1; |
3438 | break; | 3408 | break; |
3439 | case 0xf2: /* REPNE/REPNZ */ | 3409 | case 0xf2: /* REPNE/REPNZ */ |
3440 | case 0xf3: /* REP/REPE/REPZ */ | 3410 | case 0xf3: /* REP/REPE/REPZ */ |
3441 | c->rep_prefix = c->b; | 3411 | ctxt->rep_prefix = ctxt->b; |
3442 | break; | 3412 | break; |
3443 | default: | 3413 | default: |
3444 | goto done_prefixes; | 3414 | goto done_prefixes; |
@@ -3446,50 +3416,50 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
3446 | 3416 | ||
3447 | /* Any legacy prefix after a REX prefix nullifies its effect. */ | 3417 | /* Any legacy prefix after a REX prefix nullifies its effect. */ |
3448 | 3418 | ||
3449 | c->rex_prefix = 0; | 3419 | ctxt->rex_prefix = 0; |
3450 | } | 3420 | } |
3451 | 3421 | ||
3452 | done_prefixes: | 3422 | done_prefixes: |
3453 | 3423 | ||
3454 | /* REX prefix. */ | 3424 | /* REX prefix. */ |
3455 | if (c->rex_prefix & 8) | 3425 | if (ctxt->rex_prefix & 8) |
3456 | c->op_bytes = 8; /* REX.W */ | 3426 | ctxt->op_bytes = 8; /* REX.W */ |
3457 | 3427 | ||
3458 | /* Opcode byte(s). */ | 3428 | /* Opcode byte(s). */ |
3459 | opcode = opcode_table[c->b]; | 3429 | opcode = opcode_table[ctxt->b]; |
3460 | /* Two-byte opcode? */ | 3430 | /* Two-byte opcode? */ |
3461 | if (c->b == 0x0f) { | 3431 | if (ctxt->b == 0x0f) { |
3462 | c->twobyte = 1; | 3432 | ctxt->twobyte = 1; |
3463 | c->b = insn_fetch(u8, 1, c->eip); | 3433 | ctxt->b = insn_fetch(u8, 1, ctxt->_eip); |
3464 | opcode = twobyte_table[c->b]; | 3434 | opcode = twobyte_table[ctxt->b]; |
3465 | } | 3435 | } |
3466 | c->d = opcode.flags; | 3436 | ctxt->d = opcode.flags; |
3467 | 3437 | ||
3468 | while (c->d & GroupMask) { | 3438 | while (ctxt->d & GroupMask) { |
3469 | switch (c->d & GroupMask) { | 3439 | switch (ctxt->d & GroupMask) { |
3470 | case Group: | 3440 | case Group: |
3471 | c->modrm = insn_fetch(u8, 1, c->eip); | 3441 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); |
3472 | --c->eip; | 3442 | --ctxt->_eip; |
3473 | goffset = (c->modrm >> 3) & 7; | 3443 | goffset = (ctxt->modrm >> 3) & 7; |
3474 | opcode = opcode.u.group[goffset]; | 3444 | opcode = opcode.u.group[goffset]; |
3475 | break; | 3445 | break; |
3476 | case GroupDual: | 3446 | case GroupDual: |
3477 | c->modrm = insn_fetch(u8, 1, c->eip); | 3447 | ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip); |
3478 | --c->eip; | 3448 | --ctxt->_eip; |
3479 | goffset = (c->modrm >> 3) & 7; | 3449 | goffset = (ctxt->modrm >> 3) & 7; |
3480 | if ((c->modrm >> 6) == 3) | 3450 | if ((ctxt->modrm >> 6) == 3) |
3481 | opcode = opcode.u.gdual->mod3[goffset]; | 3451 | opcode = opcode.u.gdual->mod3[goffset]; |
3482 | else | 3452 | else |
3483 | opcode = opcode.u.gdual->mod012[goffset]; | 3453 | opcode = opcode.u.gdual->mod012[goffset]; |
3484 | break; | 3454 | break; |
3485 | case RMExt: | 3455 | case RMExt: |
3486 | goffset = c->modrm & 7; | 3456 | goffset = ctxt->modrm & 7; |
3487 | opcode = opcode.u.group[goffset]; | 3457 | opcode = opcode.u.group[goffset]; |
3488 | break; | 3458 | break; |
3489 | case Prefix: | 3459 | case Prefix: |
3490 | if (c->rep_prefix && op_prefix) | 3460 | if (ctxt->rep_prefix && op_prefix) |
3491 | return X86EMUL_UNHANDLEABLE; | 3461 | return X86EMUL_UNHANDLEABLE; |
3492 | simd_prefix = op_prefix ? 0x66 : c->rep_prefix; | 3462 | simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix; |
3493 | switch (simd_prefix) { | 3463 | switch (simd_prefix) { |
3494 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; | 3464 | case 0x00: opcode = opcode.u.gprefix->pfx_no; break; |
3495 | case 0x66: opcode = opcode.u.gprefix->pfx_66; break; | 3465 | case 0x66: opcode = opcode.u.gprefix->pfx_66; break; |
@@ -3501,61 +3471,61 @@ done_prefixes: | |||
3501 | return X86EMUL_UNHANDLEABLE; | 3471 | return X86EMUL_UNHANDLEABLE; |
3502 | } | 3472 | } |
3503 | 3473 | ||
3504 | c->d &= ~GroupMask; | 3474 | ctxt->d &= ~GroupMask; |
3505 | c->d |= opcode.flags; | 3475 | ctxt->d |= opcode.flags; |
3506 | } | 3476 | } |
3507 | 3477 | ||
3508 | c->execute = opcode.u.execute; | 3478 | ctxt->execute = opcode.u.execute; |
3509 | c->check_perm = opcode.check_perm; | 3479 | ctxt->check_perm = opcode.check_perm; |
3510 | c->intercept = opcode.intercept; | 3480 | ctxt->intercept = opcode.intercept; |
3511 | 3481 | ||
3512 | /* Unrecognised? */ | 3482 | /* Unrecognised? */ |
3513 | if (c->d == 0 || (c->d & Undefined)) | 3483 | if (ctxt->d == 0 || (ctxt->d & Undefined)) |
3514 | return -1; | 3484 | return -1; |
3515 | 3485 | ||
3516 | if (!(c->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | 3486 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) |
3517 | return -1; | 3487 | return -1; |
3518 | 3488 | ||
3519 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) | 3489 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) |
3520 | c->op_bytes = 8; | 3490 | ctxt->op_bytes = 8; |
3521 | 3491 | ||
3522 | if (c->d & Op3264) { | 3492 | if (ctxt->d & Op3264) { |
3523 | if (mode == X86EMUL_MODE_PROT64) | 3493 | if (mode == X86EMUL_MODE_PROT64) |
3524 | c->op_bytes = 8; | 3494 | ctxt->op_bytes = 8; |
3525 | else | 3495 | else |
3526 | c->op_bytes = 4; | 3496 | ctxt->op_bytes = 4; |
3527 | } | 3497 | } |
3528 | 3498 | ||
3529 | if (c->d & Sse) | 3499 | if (ctxt->d & Sse) |
3530 | c->op_bytes = 16; | 3500 | ctxt->op_bytes = 16; |
3531 | 3501 | ||
3532 | /* ModRM and SIB bytes. */ | 3502 | /* ModRM and SIB bytes. */ |
3533 | if (c->d & ModRM) { | 3503 | if (ctxt->d & ModRM) { |
3534 | rc = decode_modrm(ctxt, ops, &memop); | 3504 | rc = decode_modrm(ctxt, &memop); |
3535 | if (!c->has_seg_override) | 3505 | if (!ctxt->has_seg_override) |
3536 | set_seg_override(c, c->modrm_seg); | 3506 | set_seg_override(ctxt, ctxt->modrm_seg); |
3537 | } else if (c->d & MemAbs) | 3507 | } else if (ctxt->d & MemAbs) |
3538 | rc = decode_abs(ctxt, ops, &memop); | 3508 | rc = decode_abs(ctxt, &memop); |
3539 | if (rc != X86EMUL_CONTINUE) | 3509 | if (rc != X86EMUL_CONTINUE) |
3540 | goto done; | 3510 | goto done; |
3541 | 3511 | ||
3542 | if (!c->has_seg_override) | 3512 | if (!ctxt->has_seg_override) |
3543 | set_seg_override(c, VCPU_SREG_DS); | 3513 | set_seg_override(ctxt, VCPU_SREG_DS); |
3544 | 3514 | ||
3545 | memop.addr.mem.seg = seg_override(ctxt, c); | 3515 | memop.addr.mem.seg = seg_override(ctxt); |
3546 | 3516 | ||
3547 | if (memop.type == OP_MEM && c->ad_bytes != 8) | 3517 | if (memop.type == OP_MEM && ctxt->ad_bytes != 8) |
3548 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; | 3518 | memop.addr.mem.ea = (u32)memop.addr.mem.ea; |
3549 | 3519 | ||
3550 | /* | 3520 | /* |
3551 | * Decode and fetch the source operand: register, memory | 3521 | * Decode and fetch the source operand: register, memory |
3552 | * or immediate. | 3522 | * or immediate. |
3553 | */ | 3523 | */ |
3554 | switch (c->d & SrcMask) { | 3524 | switch (ctxt->d & SrcMask) { |
3555 | case SrcNone: | 3525 | case SrcNone: |
3556 | break; | 3526 | break; |
3557 | case SrcReg: | 3527 | case SrcReg: |
3558 | decode_register_operand(ctxt, &c->src, c, 0); | 3528 | decode_register_operand(ctxt, &ctxt->src, 0); |
3559 | break; | 3529 | break; |
3560 | case SrcMem16: | 3530 | case SrcMem16: |
3561 | memop.bytes = 2; | 3531 | memop.bytes = 2; |
@@ -3564,60 +3534,60 @@ done_prefixes: | |||
3564 | memop.bytes = 4; | 3534 | memop.bytes = 4; |
3565 | goto srcmem_common; | 3535 | goto srcmem_common; |
3566 | case SrcMem: | 3536 | case SrcMem: |
3567 | memop.bytes = (c->d & ByteOp) ? 1 : | 3537 | memop.bytes = (ctxt->d & ByteOp) ? 1 : |
3568 | c->op_bytes; | 3538 | ctxt->op_bytes; |
3569 | srcmem_common: | 3539 | srcmem_common: |
3570 | c->src = memop; | 3540 | ctxt->src = memop; |
3571 | memopp = &c->src; | 3541 | memopp = &ctxt->src; |
3572 | break; | 3542 | break; |
3573 | case SrcImmU16: | 3543 | case SrcImmU16: |
3574 | rc = decode_imm(ctxt, &c->src, 2, false); | 3544 | rc = decode_imm(ctxt, &ctxt->src, 2, false); |
3575 | break; | 3545 | break; |
3576 | case SrcImm: | 3546 | case SrcImm: |
3577 | rc = decode_imm(ctxt, &c->src, imm_size(c), true); | 3547 | rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), true); |
3578 | break; | 3548 | break; |
3579 | case SrcImmU: | 3549 | case SrcImmU: |
3580 | rc = decode_imm(ctxt, &c->src, imm_size(c), false); | 3550 | rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), false); |
3581 | break; | 3551 | break; |
3582 | case SrcImmByte: | 3552 | case SrcImmByte: |
3583 | rc = decode_imm(ctxt, &c->src, 1, true); | 3553 | rc = decode_imm(ctxt, &ctxt->src, 1, true); |
3584 | break; | 3554 | break; |
3585 | case SrcImmUByte: | 3555 | case SrcImmUByte: |
3586 | rc = decode_imm(ctxt, &c->src, 1, false); | 3556 | rc = decode_imm(ctxt, &ctxt->src, 1, false); |
3587 | break; | 3557 | break; |
3588 | case SrcAcc: | 3558 | case SrcAcc: |
3589 | c->src.type = OP_REG; | 3559 | ctxt->src.type = OP_REG; |
3590 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 3560 | ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3591 | c->src.addr.reg = &c->regs[VCPU_REGS_RAX]; | 3561 | ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RAX]; |
3592 | fetch_register_operand(&c->src); | 3562 | fetch_register_operand(&ctxt->src); |
3593 | break; | 3563 | break; |
3594 | case SrcOne: | 3564 | case SrcOne: |
3595 | c->src.bytes = 1; | 3565 | ctxt->src.bytes = 1; |
3596 | c->src.val = 1; | 3566 | ctxt->src.val = 1; |
3597 | break; | 3567 | break; |
3598 | case SrcSI: | 3568 | case SrcSI: |
3599 | c->src.type = OP_MEM; | 3569 | ctxt->src.type = OP_MEM; |
3600 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 3570 | ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3601 | c->src.addr.mem.ea = | 3571 | ctxt->src.addr.mem.ea = |
3602 | register_address(c, c->regs[VCPU_REGS_RSI]); | 3572 | register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); |
3603 | c->src.addr.mem.seg = seg_override(ctxt, c); | 3573 | ctxt->src.addr.mem.seg = seg_override(ctxt); |
3604 | c->src.val = 0; | 3574 | ctxt->src.val = 0; |
3605 | break; | 3575 | break; |
3606 | case SrcImmFAddr: | 3576 | case SrcImmFAddr: |
3607 | c->src.type = OP_IMM; | 3577 | ctxt->src.type = OP_IMM; |
3608 | c->src.addr.mem.ea = c->eip; | 3578 | ctxt->src.addr.mem.ea = ctxt->_eip; |
3609 | c->src.bytes = c->op_bytes + 2; | 3579 | ctxt->src.bytes = ctxt->op_bytes + 2; |
3610 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); | 3580 | insn_fetch_arr(ctxt->src.valptr, ctxt->src.bytes, ctxt->_eip); |
3611 | break; | 3581 | break; |
3612 | case SrcMemFAddr: | 3582 | case SrcMemFAddr: |
3613 | memop.bytes = c->op_bytes + 2; | 3583 | memop.bytes = ctxt->op_bytes + 2; |
3614 | goto srcmem_common; | 3584 | goto srcmem_common; |
3615 | break; | 3585 | break; |
3616 | case SrcDX: | 3586 | case SrcDX: |
3617 | c->src.type = OP_REG; | 3587 | ctxt->src.type = OP_REG; |
3618 | c->src.bytes = 2; | 3588 | ctxt->src.bytes = 2; |
3619 | c->src.addr.reg = &c->regs[VCPU_REGS_RDX]; | 3589 | ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; |
3620 | fetch_register_operand(&c->src); | 3590 | fetch_register_operand(&ctxt->src); |
3621 | break; | 3591 | break; |
3622 | } | 3592 | } |
3623 | 3593 | ||
@@ -3628,22 +3598,22 @@ done_prefixes: | |||
3628 | * Decode and fetch the second source operand: register, memory | 3598 | * Decode and fetch the second source operand: register, memory |
3629 | * or immediate. | 3599 | * or immediate. |
3630 | */ | 3600 | */ |
3631 | switch (c->d & Src2Mask) { | 3601 | switch (ctxt->d & Src2Mask) { |
3632 | case Src2None: | 3602 | case Src2None: |
3633 | break; | 3603 | break; |
3634 | case Src2CL: | 3604 | case Src2CL: |
3635 | c->src2.bytes = 1; | 3605 | ctxt->src2.bytes = 1; |
3636 | c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; | 3606 | ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0x8; |
3637 | break; | 3607 | break; |
3638 | case Src2ImmByte: | 3608 | case Src2ImmByte: |
3639 | rc = decode_imm(ctxt, &c->src2, 1, true); | 3609 | rc = decode_imm(ctxt, &ctxt->src2, 1, true); |
3640 | break; | 3610 | break; |
3641 | case Src2One: | 3611 | case Src2One: |
3642 | c->src2.bytes = 1; | 3612 | ctxt->src2.bytes = 1; |
3643 | c->src2.val = 1; | 3613 | ctxt->src2.val = 1; |
3644 | break; | 3614 | break; |
3645 | case Src2Imm: | 3615 | case Src2Imm: |
3646 | rc = decode_imm(ctxt, &c->src2, imm_size(c), true); | 3616 | rc = decode_imm(ctxt, &ctxt->src2, imm_size(ctxt), true); |
3647 | break; | 3617 | break; |
3648 | } | 3618 | } |
3649 | 3619 | ||
@@ -3651,68 +3621,66 @@ done_prefixes: | |||
3651 | goto done; | 3621 | goto done; |
3652 | 3622 | ||
3653 | /* Decode and fetch the destination operand: register or memory. */ | 3623 | /* Decode and fetch the destination operand: register or memory. */ |
3654 | switch (c->d & DstMask) { | 3624 | switch (ctxt->d & DstMask) { |
3655 | case DstReg: | 3625 | case DstReg: |
3656 | decode_register_operand(ctxt, &c->dst, c, | 3626 | decode_register_operand(ctxt, &ctxt->dst, |
3657 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 3627 | ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7)); |
3658 | break; | 3628 | break; |
3659 | case DstImmUByte: | 3629 | case DstImmUByte: |
3660 | c->dst.type = OP_IMM; | 3630 | ctxt->dst.type = OP_IMM; |
3661 | c->dst.addr.mem.ea = c->eip; | 3631 | ctxt->dst.addr.mem.ea = ctxt->_eip; |
3662 | c->dst.bytes = 1; | 3632 | ctxt->dst.bytes = 1; |
3663 | c->dst.val = insn_fetch(u8, 1, c->eip); | 3633 | ctxt->dst.val = insn_fetch(u8, 1, ctxt->_eip); |
3664 | break; | 3634 | break; |
3665 | case DstMem: | 3635 | case DstMem: |
3666 | case DstMem64: | 3636 | case DstMem64: |
3667 | c->dst = memop; | 3637 | ctxt->dst = memop; |
3668 | memopp = &c->dst; | 3638 | memopp = &ctxt->dst; |
3669 | if ((c->d & DstMask) == DstMem64) | 3639 | if ((ctxt->d & DstMask) == DstMem64) |
3670 | c->dst.bytes = 8; | 3640 | ctxt->dst.bytes = 8; |
3671 | else | 3641 | else |
3672 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 3642 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3673 | if (c->d & BitOp) | 3643 | if (ctxt->d & BitOp) |
3674 | fetch_bit_operand(c); | 3644 | fetch_bit_operand(ctxt); |
3675 | c->dst.orig_val = c->dst.val; | 3645 | ctxt->dst.orig_val = ctxt->dst.val; |
3676 | break; | 3646 | break; |
3677 | case DstAcc: | 3647 | case DstAcc: |
3678 | c->dst.type = OP_REG; | 3648 | ctxt->dst.type = OP_REG; |
3679 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 3649 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3680 | c->dst.addr.reg = &c->regs[VCPU_REGS_RAX]; | 3650 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RAX]; |
3681 | fetch_register_operand(&c->dst); | 3651 | fetch_register_operand(&ctxt->dst); |
3682 | c->dst.orig_val = c->dst.val; | 3652 | ctxt->dst.orig_val = ctxt->dst.val; |
3683 | break; | 3653 | break; |
3684 | case DstDI: | 3654 | case DstDI: |
3685 | c->dst.type = OP_MEM; | 3655 | ctxt->dst.type = OP_MEM; |
3686 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 3656 | ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
3687 | c->dst.addr.mem.ea = | 3657 | ctxt->dst.addr.mem.ea = |
3688 | register_address(c, c->regs[VCPU_REGS_RDI]); | 3658 | register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); |
3689 | c->dst.addr.mem.seg = VCPU_SREG_ES; | 3659 | ctxt->dst.addr.mem.seg = VCPU_SREG_ES; |
3690 | c->dst.val = 0; | 3660 | ctxt->dst.val = 0; |
3691 | break; | 3661 | break; |
3692 | case DstDX: | 3662 | case DstDX: |
3693 | c->dst.type = OP_REG; | 3663 | ctxt->dst.type = OP_REG; |
3694 | c->dst.bytes = 2; | 3664 | ctxt->dst.bytes = 2; |
3695 | c->dst.addr.reg = &c->regs[VCPU_REGS_RDX]; | 3665 | ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; |
3696 | fetch_register_operand(&c->dst); | 3666 | fetch_register_operand(&ctxt->dst); |
3697 | break; | 3667 | break; |
3698 | case ImplicitOps: | 3668 | case ImplicitOps: |
3699 | /* Special instructions do their own operand decoding. */ | 3669 | /* Special instructions do their own operand decoding. */ |
3700 | default: | 3670 | default: |
3701 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3671 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
3702 | break; | 3672 | break; |
3703 | } | 3673 | } |
3704 | 3674 | ||
3705 | done: | 3675 | done: |
3706 | if (memopp && memopp->type == OP_MEM && c->rip_relative) | 3676 | if (memopp && memopp->type == OP_MEM && ctxt->rip_relative) |
3707 | memopp->addr.mem.ea += c->eip; | 3677 | memopp->addr.mem.ea += ctxt->_eip; |
3708 | 3678 | ||
3709 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 3679 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
3710 | } | 3680 | } |
3711 | 3681 | ||
3712 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | 3682 | static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) |
3713 | { | 3683 | { |
3714 | struct decode_cache *c = &ctxt->decode; | ||
3715 | |||
3716 | /* The second termination condition only applies for REPE | 3684 | /* The second termination condition only applies for REPE |
3717 | * and REPNE. Test if the repeat string operation prefix is | 3685 | * and REPNE. Test if the repeat string operation prefix is |
3718 | * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the | 3686 | * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the |
@@ -3720,304 +3688,232 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) | |||
3720 | * - if REPE/REPZ and ZF = 0 then done | 3688 | * - if REPE/REPZ and ZF = 0 then done |
3721 | * - if REPNE/REPNZ and ZF = 1 then done | 3689 | * - if REPNE/REPNZ and ZF = 1 then done |
3722 | */ | 3690 | */ |
3723 | if (((c->b == 0xa6) || (c->b == 0xa7) || | 3691 | if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) || |
3724 | (c->b == 0xae) || (c->b == 0xaf)) | 3692 | (ctxt->b == 0xae) || (ctxt->b == 0xaf)) |
3725 | && (((c->rep_prefix == REPE_PREFIX) && | 3693 | && (((ctxt->rep_prefix == REPE_PREFIX) && |
3726 | ((ctxt->eflags & EFLG_ZF) == 0)) | 3694 | ((ctxt->eflags & EFLG_ZF) == 0)) |
3727 | || ((c->rep_prefix == REPNE_PREFIX) && | 3695 | || ((ctxt->rep_prefix == REPNE_PREFIX) && |
3728 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)))) | 3696 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)))) |
3729 | return true; | 3697 | return true; |
3730 | 3698 | ||
3731 | return false; | 3699 | return false; |
3732 | } | 3700 | } |
3733 | 3701 | ||
3734 | int | 3702 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
3735 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | ||
3736 | { | 3703 | { |
3737 | struct x86_emulate_ops *ops = ctxt->ops; | 3704 | struct x86_emulate_ops *ops = ctxt->ops; |
3738 | u64 msr_data; | 3705 | u64 msr_data; |
3739 | struct decode_cache *c = &ctxt->decode; | ||
3740 | int rc = X86EMUL_CONTINUE; | 3706 | int rc = X86EMUL_CONTINUE; |
3741 | int saved_dst_type = c->dst.type; | 3707 | int saved_dst_type = ctxt->dst.type; |
3742 | int irq; /* Used for int 3, int, and into */ | ||
3743 | 3708 | ||
3744 | ctxt->decode.mem_read.pos = 0; | 3709 | ctxt->mem_read.pos = 0; |
3745 | 3710 | ||
3746 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 3711 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { |
3747 | rc = emulate_ud(ctxt); | 3712 | rc = emulate_ud(ctxt); |
3748 | goto done; | 3713 | goto done; |
3749 | } | 3714 | } |
3750 | 3715 | ||
3751 | /* LOCK prefix is allowed only with some instructions */ | 3716 | /* LOCK prefix is allowed only with some instructions */ |
3752 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { | 3717 | if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { |
3753 | rc = emulate_ud(ctxt); | 3718 | rc = emulate_ud(ctxt); |
3754 | goto done; | 3719 | goto done; |
3755 | } | 3720 | } |
3756 | 3721 | ||
3757 | if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) { | 3722 | if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) { |
3758 | rc = emulate_ud(ctxt); | 3723 | rc = emulate_ud(ctxt); |
3759 | goto done; | 3724 | goto done; |
3760 | } | 3725 | } |
3761 | 3726 | ||
3762 | if ((c->d & Sse) | 3727 | if ((ctxt->d & Sse) |
3763 | && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) | 3728 | && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) |
3764 | || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { | 3729 | || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { |
3765 | rc = emulate_ud(ctxt); | 3730 | rc = emulate_ud(ctxt); |
3766 | goto done; | 3731 | goto done; |
3767 | } | 3732 | } |
3768 | 3733 | ||
3769 | if ((c->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { | 3734 | if ((ctxt->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { |
3770 | rc = emulate_nm(ctxt); | 3735 | rc = emulate_nm(ctxt); |
3771 | goto done; | 3736 | goto done; |
3772 | } | 3737 | } |
3773 | 3738 | ||
3774 | if (unlikely(ctxt->guest_mode) && c->intercept) { | 3739 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { |
3775 | rc = emulator_check_intercept(ctxt, c->intercept, | 3740 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
3776 | X86_ICPT_PRE_EXCEPT); | 3741 | X86_ICPT_PRE_EXCEPT); |
3777 | if (rc != X86EMUL_CONTINUE) | 3742 | if (rc != X86EMUL_CONTINUE) |
3778 | goto done; | 3743 | goto done; |
3779 | } | 3744 | } |
3780 | 3745 | ||
3781 | /* Privileged instruction can be executed only in CPL=0 */ | 3746 | /* Privileged instruction can be executed only in CPL=0 */ |
3782 | if ((c->d & Priv) && ops->cpl(ctxt)) { | 3747 | if ((ctxt->d & Priv) && ops->cpl(ctxt)) { |
3783 | rc = emulate_gp(ctxt, 0); | 3748 | rc = emulate_gp(ctxt, 0); |
3784 | goto done; | 3749 | goto done; |
3785 | } | 3750 | } |
3786 | 3751 | ||
3787 | /* Instruction can only be executed in protected mode */ | 3752 | /* Instruction can only be executed in protected mode */ |
3788 | if ((c->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { | 3753 | if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { |
3789 | rc = emulate_ud(ctxt); | 3754 | rc = emulate_ud(ctxt); |
3790 | goto done; | 3755 | goto done; |
3791 | } | 3756 | } |
3792 | 3757 | ||
3793 | /* Do instruction specific permission checks */ | 3758 | /* Do instruction specific permission checks */ |
3794 | if (c->check_perm) { | 3759 | if (ctxt->check_perm) { |
3795 | rc = c->check_perm(ctxt); | 3760 | rc = ctxt->check_perm(ctxt); |
3796 | if (rc != X86EMUL_CONTINUE) | 3761 | if (rc != X86EMUL_CONTINUE) |
3797 | goto done; | 3762 | goto done; |
3798 | } | 3763 | } |
3799 | 3764 | ||
3800 | if (unlikely(ctxt->guest_mode) && c->intercept) { | 3765 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { |
3801 | rc = emulator_check_intercept(ctxt, c->intercept, | 3766 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
3802 | X86_ICPT_POST_EXCEPT); | 3767 | X86_ICPT_POST_EXCEPT); |
3803 | if (rc != X86EMUL_CONTINUE) | 3768 | if (rc != X86EMUL_CONTINUE) |
3804 | goto done; | 3769 | goto done; |
3805 | } | 3770 | } |
3806 | 3771 | ||
3807 | if (c->rep_prefix && (c->d & String)) { | 3772 | if (ctxt->rep_prefix && (ctxt->d & String)) { |
3808 | /* All REP prefixes have the same first termination condition */ | 3773 | /* All REP prefixes have the same first termination condition */ |
3809 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { | 3774 | if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) { |
3810 | ctxt->eip = c->eip; | 3775 | ctxt->eip = ctxt->_eip; |
3811 | goto done; | 3776 | goto done; |
3812 | } | 3777 | } |
3813 | } | 3778 | } |
3814 | 3779 | ||
3815 | if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) { | 3780 | if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) { |
3816 | rc = segmented_read(ctxt, c->src.addr.mem, | 3781 | rc = segmented_read(ctxt, ctxt->src.addr.mem, |
3817 | c->src.valptr, c->src.bytes); | 3782 | ctxt->src.valptr, ctxt->src.bytes); |
3818 | if (rc != X86EMUL_CONTINUE) | 3783 | if (rc != X86EMUL_CONTINUE) |
3819 | goto done; | 3784 | goto done; |
3820 | c->src.orig_val64 = c->src.val64; | 3785 | ctxt->src.orig_val64 = ctxt->src.val64; |
3821 | } | 3786 | } |
3822 | 3787 | ||
3823 | if (c->src2.type == OP_MEM) { | 3788 | if (ctxt->src2.type == OP_MEM) { |
3824 | rc = segmented_read(ctxt, c->src2.addr.mem, | 3789 | rc = segmented_read(ctxt, ctxt->src2.addr.mem, |
3825 | &c->src2.val, c->src2.bytes); | 3790 | &ctxt->src2.val, ctxt->src2.bytes); |
3826 | if (rc != X86EMUL_CONTINUE) | 3791 | if (rc != X86EMUL_CONTINUE) |
3827 | goto done; | 3792 | goto done; |
3828 | } | 3793 | } |
3829 | 3794 | ||
3830 | if ((c->d & DstMask) == ImplicitOps) | 3795 | if ((ctxt->d & DstMask) == ImplicitOps) |
3831 | goto special_insn; | 3796 | goto special_insn; |
3832 | 3797 | ||
3833 | 3798 | ||
3834 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 3799 | if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) { |
3835 | /* optimisation - avoid slow emulated read if Mov */ | 3800 | /* optimisation - avoid slow emulated read if Mov */ |
3836 | rc = segmented_read(ctxt, c->dst.addr.mem, | 3801 | rc = segmented_read(ctxt, ctxt->dst.addr.mem, |
3837 | &c->dst.val, c->dst.bytes); | 3802 | &ctxt->dst.val, ctxt->dst.bytes); |
3838 | if (rc != X86EMUL_CONTINUE) | 3803 | if (rc != X86EMUL_CONTINUE) |
3839 | goto done; | 3804 | goto done; |
3840 | } | 3805 | } |
3841 | c->dst.orig_val = c->dst.val; | 3806 | ctxt->dst.orig_val = ctxt->dst.val; |
3842 | 3807 | ||
3843 | special_insn: | 3808 | special_insn: |
3844 | 3809 | ||
3845 | if (unlikely(ctxt->guest_mode) && c->intercept) { | 3810 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { |
3846 | rc = emulator_check_intercept(ctxt, c->intercept, | 3811 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
3847 | X86_ICPT_POST_MEMACCESS); | 3812 | X86_ICPT_POST_MEMACCESS); |
3848 | if (rc != X86EMUL_CONTINUE) | 3813 | if (rc != X86EMUL_CONTINUE) |
3849 | goto done; | 3814 | goto done; |
3850 | } | 3815 | } |
3851 | 3816 | ||
3852 | if (c->execute) { | 3817 | if (ctxt->execute) { |
3853 | rc = c->execute(ctxt); | 3818 | rc = ctxt->execute(ctxt); |
3854 | if (rc != X86EMUL_CONTINUE) | 3819 | if (rc != X86EMUL_CONTINUE) |
3855 | goto done; | 3820 | goto done; |
3856 | goto writeback; | 3821 | goto writeback; |
3857 | } | 3822 | } |
3858 | 3823 | ||
3859 | if (c->twobyte) | 3824 | if (ctxt->twobyte) |
3860 | goto twobyte_insn; | 3825 | goto twobyte_insn; |
3861 | 3826 | ||
3862 | switch (c->b) { | 3827 | switch (ctxt->b) { |
3863 | case 0x06: /* push es */ | 3828 | case 0x06: /* push es */ |
3864 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); | 3829 | rc = emulate_push_sreg(ctxt, VCPU_SREG_ES); |
3865 | break; | 3830 | break; |
3866 | case 0x07: /* pop es */ | 3831 | case 0x07: /* pop es */ |
3867 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 3832 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_ES); |
3868 | break; | 3833 | break; |
3869 | case 0x0e: /* push cs */ | 3834 | case 0x0e: /* push cs */ |
3870 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); | 3835 | rc = emulate_push_sreg(ctxt, VCPU_SREG_CS); |
3871 | break; | 3836 | break; |
3872 | case 0x16: /* push ss */ | 3837 | case 0x16: /* push ss */ |
3873 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); | 3838 | rc = emulate_push_sreg(ctxt, VCPU_SREG_SS); |
3874 | break; | 3839 | break; |
3875 | case 0x17: /* pop ss */ | 3840 | case 0x17: /* pop ss */ |
3876 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 3841 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_SS); |
3877 | break; | 3842 | break; |
3878 | case 0x1e: /* push ds */ | 3843 | case 0x1e: /* push ds */ |
3879 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); | 3844 | rc = emulate_push_sreg(ctxt, VCPU_SREG_DS); |
3880 | break; | 3845 | break; |
3881 | case 0x1f: /* pop ds */ | 3846 | case 0x1f: /* pop ds */ |
3882 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 3847 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_DS); |
3883 | break; | 3848 | break; |
3884 | case 0x40 ... 0x47: /* inc r16/r32 */ | 3849 | case 0x40 ... 0x47: /* inc r16/r32 */ |
3885 | emulate_1op("inc", c->dst, ctxt->eflags); | 3850 | emulate_1op("inc", ctxt->dst, ctxt->eflags); |
3886 | break; | 3851 | break; |
3887 | case 0x48 ... 0x4f: /* dec r16/r32 */ | 3852 | case 0x48 ... 0x4f: /* dec r16/r32 */ |
3888 | emulate_1op("dec", c->dst, ctxt->eflags); | 3853 | emulate_1op("dec", ctxt->dst, ctxt->eflags); |
3889 | break; | 3854 | break; |
3890 | case 0x63: /* movsxd */ | 3855 | case 0x63: /* movsxd */ |
3891 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 3856 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
3892 | goto cannot_emulate; | 3857 | goto cannot_emulate; |
3893 | c->dst.val = (s32) c->src.val; | 3858 | ctxt->dst.val = (s32) ctxt->src.val; |
3894 | break; | 3859 | break; |
3895 | case 0x6c: /* insb */ | 3860 | case 0x6c: /* insb */ |
3896 | case 0x6d: /* insw/insd */ | 3861 | case 0x6d: /* insw/insd */ |
3897 | c->src.val = c->regs[VCPU_REGS_RDX]; | 3862 | ctxt->src.val = ctxt->regs[VCPU_REGS_RDX]; |
3898 | goto do_io_in; | 3863 | goto do_io_in; |
3899 | case 0x6e: /* outsb */ | 3864 | case 0x6e: /* outsb */ |
3900 | case 0x6f: /* outsw/outsd */ | 3865 | case 0x6f: /* outsw/outsd */ |
3901 | c->dst.val = c->regs[VCPU_REGS_RDX]; | 3866 | ctxt->dst.val = ctxt->regs[VCPU_REGS_RDX]; |
3902 | goto do_io_out; | 3867 | goto do_io_out; |
3903 | break; | 3868 | break; |
3904 | case 0x70 ... 0x7f: /* jcc (short) */ | 3869 | case 0x70 ... 0x7f: /* jcc (short) */ |
3905 | if (test_cc(c->b, ctxt->eflags)) | 3870 | if (test_cc(ctxt->b, ctxt->eflags)) |
3906 | jmp_rel(c, c->src.val); | 3871 | jmp_rel(ctxt, ctxt->src.val); |
3907 | break; | ||
3908 | case 0x84 ... 0x85: | ||
3909 | test: | ||
3910 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | ||
3911 | break; | ||
3912 | case 0x86 ... 0x87: /* xchg */ | ||
3913 | xchg: | ||
3914 | /* Write back the register source. */ | ||
3915 | c->src.val = c->dst.val; | ||
3916 | write_register_operand(&c->src); | ||
3917 | /* | ||
3918 | * Write back the memory destination with implicit LOCK | ||
3919 | * prefix. | ||
3920 | */ | ||
3921 | c->dst.val = c->src.orig_val; | ||
3922 | c->lock_prefix = 1; | ||
3923 | break; | ||
3924 | case 0x8c: /* mov r/m, sreg */ | ||
3925 | if (c->modrm_reg > VCPU_SREG_GS) { | ||
3926 | rc = emulate_ud(ctxt); | ||
3927 | goto done; | ||
3928 | } | ||
3929 | c->dst.val = get_segment_selector(ctxt, c->modrm_reg); | ||
3930 | break; | 3872 | break; |
3931 | case 0x8d: /* lea r16/r32, m */ | 3873 | case 0x8d: /* lea r16/r32, m */ |
3932 | c->dst.val = c->src.addr.mem.ea; | 3874 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
3933 | break; | 3875 | break; |
3934 | case 0x8e: { /* mov seg, r/m16 */ | ||
3935 | uint16_t sel; | ||
3936 | |||
3937 | sel = c->src.val; | ||
3938 | |||
3939 | if (c->modrm_reg == VCPU_SREG_CS || | ||
3940 | c->modrm_reg > VCPU_SREG_GS) { | ||
3941 | rc = emulate_ud(ctxt); | ||
3942 | goto done; | ||
3943 | } | ||
3944 | |||
3945 | if (c->modrm_reg == VCPU_SREG_SS) | ||
3946 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; | ||
3947 | |||
3948 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); | ||
3949 | |||
3950 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
3951 | break; | ||
3952 | } | ||
3953 | case 0x8f: /* pop (sole member of Grp1a) */ | 3876 | case 0x8f: /* pop (sole member of Grp1a) */ |
3954 | rc = em_grp1a(ctxt); | 3877 | rc = em_grp1a(ctxt); |
3955 | break; | 3878 | break; |
3956 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 3879 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
3957 | if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX]) | 3880 | if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) |
3958 | break; | 3881 | break; |
3959 | goto xchg; | 3882 | rc = em_xchg(ctxt); |
3883 | break; | ||
3960 | case 0x98: /* cbw/cwde/cdqe */ | 3884 | case 0x98: /* cbw/cwde/cdqe */ |
3961 | switch (c->op_bytes) { | 3885 | switch (ctxt->op_bytes) { |
3962 | case 2: c->dst.val = (s8)c->dst.val; break; | 3886 | case 2: ctxt->dst.val = (s8)ctxt->dst.val; break; |
3963 | case 4: c->dst.val = (s16)c->dst.val; break; | 3887 | case 4: ctxt->dst.val = (s16)ctxt->dst.val; break; |
3964 | case 8: c->dst.val = (s32)c->dst.val; break; | 3888 | case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; |
3965 | } | 3889 | } |
3966 | break; | 3890 | break; |
3967 | case 0xa8 ... 0xa9: /* test ax, imm */ | ||
3968 | goto test; | ||
3969 | case 0xc0 ... 0xc1: | 3891 | case 0xc0 ... 0xc1: |
3970 | rc = em_grp2(ctxt); | 3892 | rc = em_grp2(ctxt); |
3971 | break; | 3893 | break; |
3972 | case 0xc3: /* ret */ | ||
3973 | c->dst.type = OP_REG; | ||
3974 | c->dst.addr.reg = &c->eip; | ||
3975 | c->dst.bytes = c->op_bytes; | ||
3976 | rc = em_pop(ctxt); | ||
3977 | break; | ||
3978 | case 0xc4: /* les */ | 3894 | case 0xc4: /* les */ |
3979 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); | 3895 | rc = emulate_load_segment(ctxt, VCPU_SREG_ES); |
3980 | break; | 3896 | break; |
3981 | case 0xc5: /* lds */ | 3897 | case 0xc5: /* lds */ |
3982 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_DS); | 3898 | rc = emulate_load_segment(ctxt, VCPU_SREG_DS); |
3983 | break; | ||
3984 | case 0xcb: /* ret far */ | ||
3985 | rc = emulate_ret_far(ctxt, ops); | ||
3986 | break; | 3899 | break; |
3987 | case 0xcc: /* int3 */ | 3900 | case 0xcc: /* int3 */ |
3988 | irq = 3; | 3901 | rc = emulate_int(ctxt, 3); |
3989 | goto do_interrupt; | 3902 | break; |
3990 | case 0xcd: /* int n */ | 3903 | case 0xcd: /* int n */ |
3991 | irq = c->src.val; | 3904 | rc = emulate_int(ctxt, ctxt->src.val); |
3992 | do_interrupt: | ||
3993 | rc = emulate_int(ctxt, ops, irq); | ||
3994 | break; | 3905 | break; |
3995 | case 0xce: /* into */ | 3906 | case 0xce: /* into */ |
3996 | if (ctxt->eflags & EFLG_OF) { | 3907 | if (ctxt->eflags & EFLG_OF) |
3997 | irq = 4; | 3908 | rc = emulate_int(ctxt, 4); |
3998 | goto do_interrupt; | ||
3999 | } | ||
4000 | break; | ||
4001 | case 0xcf: /* iret */ | ||
4002 | rc = emulate_iret(ctxt, ops); | ||
4003 | break; | 3909 | break; |
4004 | case 0xd0 ... 0xd1: /* Grp2 */ | 3910 | case 0xd0 ... 0xd1: /* Grp2 */ |
4005 | rc = em_grp2(ctxt); | 3911 | rc = em_grp2(ctxt); |
4006 | break; | 3912 | break; |
4007 | case 0xd2 ... 0xd3: /* Grp2 */ | 3913 | case 0xd2 ... 0xd3: /* Grp2 */ |
4008 | c->src.val = c->regs[VCPU_REGS_RCX]; | 3914 | ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; |
4009 | rc = em_grp2(ctxt); | 3915 | rc = em_grp2(ctxt); |
4010 | break; | 3916 | break; |
4011 | case 0xe0 ... 0xe2: /* loop/loopz/loopnz */ | ||
4012 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | ||
4013 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) != 0 && | ||
4014 | (c->b == 0xe2 || test_cc(c->b ^ 0x5, ctxt->eflags))) | ||
4015 | jmp_rel(c, c->src.val); | ||
4016 | break; | ||
4017 | case 0xe3: /* jcxz/jecxz/jrcxz */ | ||
4018 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) | ||
4019 | jmp_rel(c, c->src.val); | ||
4020 | break; | ||
4021 | case 0xe4: /* inb */ | 3917 | case 0xe4: /* inb */ |
4022 | case 0xe5: /* in */ | 3918 | case 0xe5: /* in */ |
4023 | goto do_io_in; | 3919 | goto do_io_in; |
@@ -4025,35 +3921,30 @@ special_insn: | |||
4025 | case 0xe7: /* out */ | 3921 | case 0xe7: /* out */ |
4026 | goto do_io_out; | 3922 | goto do_io_out; |
4027 | case 0xe8: /* call (near) */ { | 3923 | case 0xe8: /* call (near) */ { |
4028 | long int rel = c->src.val; | 3924 | long int rel = ctxt->src.val; |
4029 | c->src.val = (unsigned long) c->eip; | 3925 | ctxt->src.val = (unsigned long) ctxt->_eip; |
4030 | jmp_rel(c, rel); | 3926 | jmp_rel(ctxt, rel); |
4031 | rc = em_push(ctxt); | 3927 | rc = em_push(ctxt); |
4032 | break; | 3928 | break; |
4033 | } | 3929 | } |
4034 | case 0xe9: /* jmp rel */ | 3930 | case 0xe9: /* jmp rel */ |
4035 | goto jmp; | 3931 | case 0xeb: /* jmp rel short */ |
4036 | case 0xea: /* jmp far */ | 3932 | jmp_rel(ctxt, ctxt->src.val); |
4037 | rc = em_jmp_far(ctxt); | 3933 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
4038 | break; | ||
4039 | case 0xeb: | ||
4040 | jmp: /* jmp rel short */ | ||
4041 | jmp_rel(c, c->src.val); | ||
4042 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
4043 | break; | 3934 | break; |
4044 | case 0xec: /* in al,dx */ | 3935 | case 0xec: /* in al,dx */ |
4045 | case 0xed: /* in (e/r)ax,dx */ | 3936 | case 0xed: /* in (e/r)ax,dx */ |
4046 | do_io_in: | 3937 | do_io_in: |
4047 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 3938 | if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val, |
4048 | &c->dst.val)) | 3939 | &ctxt->dst.val)) |
4049 | goto done; /* IO is needed */ | 3940 | goto done; /* IO is needed */ |
4050 | break; | 3941 | break; |
4051 | case 0xee: /* out dx,al */ | 3942 | case 0xee: /* out dx,al */ |
4052 | case 0xef: /* out dx,(e/r)ax */ | 3943 | case 0xef: /* out dx,(e/r)ax */ |
4053 | do_io_out: | 3944 | do_io_out: |
4054 | ops->pio_out_emulated(ctxt, c->src.bytes, c->dst.val, | 3945 | ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val, |
4055 | &c->src.val, 1); | 3946 | &ctxt->src.val, 1); |
4056 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3947 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
4057 | break; | 3948 | break; |
4058 | case 0xf4: /* hlt */ | 3949 | case 0xf4: /* hlt */ |
4059 | ctxt->ops->halt(ctxt); | 3950 | ctxt->ops->halt(ctxt); |
@@ -4071,22 +3962,6 @@ special_insn: | |||
4071 | case 0xf9: /* stc */ | 3962 | case 0xf9: /* stc */ |
4072 | ctxt->eflags |= EFLG_CF; | 3963 | ctxt->eflags |= EFLG_CF; |
4073 | break; | 3964 | break; |
4074 | case 0xfa: /* cli */ | ||
4075 | if (emulator_bad_iopl(ctxt, ops)) { | ||
4076 | rc = emulate_gp(ctxt, 0); | ||
4077 | goto done; | ||
4078 | } else | ||
4079 | ctxt->eflags &= ~X86_EFLAGS_IF; | ||
4080 | break; | ||
4081 | case 0xfb: /* sti */ | ||
4082 | if (emulator_bad_iopl(ctxt, ops)) { | ||
4083 | rc = emulate_gp(ctxt, 0); | ||
4084 | goto done; | ||
4085 | } else { | ||
4086 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; | ||
4087 | ctxt->eflags |= X86_EFLAGS_IF; | ||
4088 | } | ||
4089 | break; | ||
4090 | case 0xfc: /* cld */ | 3965 | case 0xfc: /* cld */ |
4091 | ctxt->eflags &= ~EFLG_DF; | 3966 | ctxt->eflags &= ~EFLG_DF; |
4092 | break; | 3967 | break; |
@@ -4115,40 +3990,40 @@ writeback: | |||
4115 | * restore dst type in case the decoding will be reused | 3990 | * restore dst type in case the decoding will be reused |
4116 | * (happens for string instruction ) | 3991 | * (happens for string instruction ) |
4117 | */ | 3992 | */ |
4118 | c->dst.type = saved_dst_type; | 3993 | ctxt->dst.type = saved_dst_type; |
4119 | 3994 | ||
4120 | if ((c->d & SrcMask) == SrcSI) | 3995 | if ((ctxt->d & SrcMask) == SrcSI) |
4121 | string_addr_inc(ctxt, seg_override(ctxt, c), | 3996 | string_addr_inc(ctxt, seg_override(ctxt), |
4122 | VCPU_REGS_RSI, &c->src); | 3997 | VCPU_REGS_RSI, &ctxt->src); |
4123 | 3998 | ||
4124 | if ((c->d & DstMask) == DstDI) | 3999 | if ((ctxt->d & DstMask) == DstDI) |
4125 | string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI, | 4000 | string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI, |
4126 | &c->dst); | 4001 | &ctxt->dst); |
4127 | 4002 | ||
4128 | if (c->rep_prefix && (c->d & String)) { | 4003 | if (ctxt->rep_prefix && (ctxt->d & String)) { |
4129 | struct read_cache *r = &ctxt->decode.io_read; | 4004 | struct read_cache *r = &ctxt->io_read; |
4130 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | 4005 | register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); |
4131 | 4006 | ||
4132 | if (!string_insn_completed(ctxt)) { | 4007 | if (!string_insn_completed(ctxt)) { |
4133 | /* | 4008 | /* |
4134 | * Re-enter guest when pio read ahead buffer is empty | 4009 | * Re-enter guest when pio read ahead buffer is empty |
4135 | * or, if it is not used, after each 1024 iteration. | 4010 | * or, if it is not used, after each 1024 iteration. |
4136 | */ | 4011 | */ |
4137 | if ((r->end != 0 || c->regs[VCPU_REGS_RCX] & 0x3ff) && | 4012 | if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) && |
4138 | (r->end == 0 || r->end != r->pos)) { | 4013 | (r->end == 0 || r->end != r->pos)) { |
4139 | /* | 4014 | /* |
4140 | * Reset read cache. Usually happens before | 4015 | * Reset read cache. Usually happens before |
4141 | * decode, but since instruction is restarted | 4016 | * decode, but since instruction is restarted |
4142 | * we have to do it here. | 4017 | * we have to do it here. |
4143 | */ | 4018 | */ |
4144 | ctxt->decode.mem_read.end = 0; | 4019 | ctxt->mem_read.end = 0; |
4145 | return EMULATION_RESTART; | 4020 | return EMULATION_RESTART; |
4146 | } | 4021 | } |
4147 | goto done; /* skip rip writeback */ | 4022 | goto done; /* skip rip writeback */ |
4148 | } | 4023 | } |
4149 | } | 4024 | } |
4150 | 4025 | ||
4151 | ctxt->eip = c->eip; | 4026 | ctxt->eip = ctxt->_eip; |
4152 | 4027 | ||
4153 | done: | 4028 | done: |
4154 | if (rc == X86EMUL_PROPAGATE_FAULT) | 4029 | if (rc == X86EMUL_PROPAGATE_FAULT) |
@@ -4159,13 +4034,7 @@ done: | |||
4159 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; | 4034 | return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; |
4160 | 4035 | ||
4161 | twobyte_insn: | 4036 | twobyte_insn: |
4162 | switch (c->b) { | 4037 | switch (ctxt->b) { |
4163 | case 0x05: /* syscall */ | ||
4164 | rc = emulate_syscall(ctxt, ops); | ||
4165 | break; | ||
4166 | case 0x06: | ||
4167 | rc = em_clts(ctxt); | ||
4168 | break; | ||
4169 | case 0x09: /* wbinvd */ | 4038 | case 0x09: /* wbinvd */ |
4170 | (ctxt->ops->wbinvd)(ctxt); | 4039 | (ctxt->ops->wbinvd)(ctxt); |
4171 | break; | 4040 | break; |
@@ -4174,21 +4043,21 @@ twobyte_insn: | |||
4174 | case 0x18: /* Grp16 (prefetch/nop) */ | 4043 | case 0x18: /* Grp16 (prefetch/nop) */ |
4175 | break; | 4044 | break; |
4176 | case 0x20: /* mov cr, reg */ | 4045 | case 0x20: /* mov cr, reg */ |
4177 | c->dst.val = ops->get_cr(ctxt, c->modrm_reg); | 4046 | ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg); |
4178 | break; | 4047 | break; |
4179 | case 0x21: /* mov from dr to reg */ | 4048 | case 0x21: /* mov from dr to reg */ |
4180 | ops->get_dr(ctxt, c->modrm_reg, &c->dst.val); | 4049 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); |
4181 | break; | 4050 | break; |
4182 | case 0x22: /* mov reg, cr */ | 4051 | case 0x22: /* mov reg, cr */ |
4183 | if (ops->set_cr(ctxt, c->modrm_reg, c->src.val)) { | 4052 | if (ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) { |
4184 | emulate_gp(ctxt, 0); | 4053 | emulate_gp(ctxt, 0); |
4185 | rc = X86EMUL_PROPAGATE_FAULT; | 4054 | rc = X86EMUL_PROPAGATE_FAULT; |
4186 | goto done; | 4055 | goto done; |
4187 | } | 4056 | } |
4188 | c->dst.type = OP_NONE; | 4057 | ctxt->dst.type = OP_NONE; |
4189 | break; | 4058 | break; |
4190 | case 0x23: /* mov from reg to dr */ | 4059 | case 0x23: /* mov from reg to dr */ |
4191 | if (ops->set_dr(ctxt, c->modrm_reg, c->src.val & | 4060 | if (ops->set_dr(ctxt, ctxt->modrm_reg, ctxt->src.val & |
4192 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | 4061 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? |
4193 | ~0ULL : ~0U)) < 0) { | 4062 | ~0ULL : ~0U)) < 0) { |
4194 | /* #UD condition is already handled by the code above */ | 4063 | /* #UD condition is already handled by the code above */ |
@@ -4197,13 +4066,13 @@ twobyte_insn: | |||
4197 | goto done; | 4066 | goto done; |
4198 | } | 4067 | } |
4199 | 4068 | ||
4200 | c->dst.type = OP_NONE; /* no writeback */ | 4069 | ctxt->dst.type = OP_NONE; /* no writeback */ |
4201 | break; | 4070 | break; |
4202 | case 0x30: | 4071 | case 0x30: |
4203 | /* wrmsr */ | 4072 | /* wrmsr */ |
4204 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 4073 | msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] |
4205 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 4074 | | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); |
4206 | if (ops->set_msr(ctxt, c->regs[VCPU_REGS_RCX], msr_data)) { | 4075 | if (ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) { |
4207 | emulate_gp(ctxt, 0); | 4076 | emulate_gp(ctxt, 0); |
4208 | rc = X86EMUL_PROPAGATE_FAULT; | 4077 | rc = X86EMUL_PROPAGATE_FAULT; |
4209 | goto done; | 4078 | goto done; |
@@ -4212,64 +4081,58 @@ twobyte_insn: | |||
4212 | break; | 4081 | break; |
4213 | case 0x32: | 4082 | case 0x32: |
4214 | /* rdmsr */ | 4083 | /* rdmsr */ |
4215 | if (ops->get_msr(ctxt, c->regs[VCPU_REGS_RCX], &msr_data)) { | 4084 | if (ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) { |
4216 | emulate_gp(ctxt, 0); | 4085 | emulate_gp(ctxt, 0); |
4217 | rc = X86EMUL_PROPAGATE_FAULT; | 4086 | rc = X86EMUL_PROPAGATE_FAULT; |
4218 | goto done; | 4087 | goto done; |
4219 | } else { | 4088 | } else { |
4220 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 4089 | ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; |
4221 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 4090 | ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; |
4222 | } | 4091 | } |
4223 | rc = X86EMUL_CONTINUE; | 4092 | rc = X86EMUL_CONTINUE; |
4224 | break; | 4093 | break; |
4225 | case 0x34: /* sysenter */ | ||
4226 | rc = emulate_sysenter(ctxt, ops); | ||
4227 | break; | ||
4228 | case 0x35: /* sysexit */ | ||
4229 | rc = emulate_sysexit(ctxt, ops); | ||
4230 | break; | ||
4231 | case 0x40 ... 0x4f: /* cmov */ | 4094 | case 0x40 ... 0x4f: /* cmov */ |
4232 | c->dst.val = c->dst.orig_val = c->src.val; | 4095 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; |
4233 | if (!test_cc(c->b, ctxt->eflags)) | 4096 | if (!test_cc(ctxt->b, ctxt->eflags)) |
4234 | c->dst.type = OP_NONE; /* no writeback */ | 4097 | ctxt->dst.type = OP_NONE; /* no writeback */ |
4235 | break; | 4098 | break; |
4236 | case 0x80 ... 0x8f: /* jnz rel, etc*/ | 4099 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
4237 | if (test_cc(c->b, ctxt->eflags)) | 4100 | if (test_cc(ctxt->b, ctxt->eflags)) |
4238 | jmp_rel(c, c->src.val); | 4101 | jmp_rel(ctxt, ctxt->src.val); |
4239 | break; | 4102 | break; |
4240 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 4103 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4241 | c->dst.val = test_cc(c->b, ctxt->eflags); | 4104 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4242 | break; | 4105 | break; |
4243 | case 0xa0: /* push fs */ | 4106 | case 0xa0: /* push fs */ |
4244 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); | 4107 | rc = emulate_push_sreg(ctxt, VCPU_SREG_FS); |
4245 | break; | 4108 | break; |
4246 | case 0xa1: /* pop fs */ | 4109 | case 0xa1: /* pop fs */ |
4247 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 4110 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_FS); |
4248 | break; | 4111 | break; |
4249 | case 0xa3: | 4112 | case 0xa3: |
4250 | bt: /* bt */ | 4113 | bt: /* bt */ |
4251 | c->dst.type = OP_NONE; | 4114 | ctxt->dst.type = OP_NONE; |
4252 | /* only subword offset */ | 4115 | /* only subword offset */ |
4253 | c->src.val &= (c->dst.bytes << 3) - 1; | 4116 | ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; |
4254 | emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); | 4117 | emulate_2op_SrcV_nobyte("bt", ctxt->src, ctxt->dst, ctxt->eflags); |
4255 | break; | 4118 | break; |
4256 | case 0xa4: /* shld imm8, r, r/m */ | 4119 | case 0xa4: /* shld imm8, r, r/m */ |
4257 | case 0xa5: /* shld cl, r, r/m */ | 4120 | case 0xa5: /* shld cl, r, r/m */ |
4258 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 4121 | emulate_2op_cl("shld", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags); |
4259 | break; | 4122 | break; |
4260 | case 0xa8: /* push gs */ | 4123 | case 0xa8: /* push gs */ |
4261 | rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); | 4124 | rc = emulate_push_sreg(ctxt, VCPU_SREG_GS); |
4262 | break; | 4125 | break; |
4263 | case 0xa9: /* pop gs */ | 4126 | case 0xa9: /* pop gs */ |
4264 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 4127 | rc = emulate_pop_sreg(ctxt, VCPU_SREG_GS); |
4265 | break; | 4128 | break; |
4266 | case 0xab: | 4129 | case 0xab: |
4267 | bts: /* bts */ | 4130 | bts: /* bts */ |
4268 | emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); | 4131 | emulate_2op_SrcV_nobyte("bts", ctxt->src, ctxt->dst, ctxt->eflags); |
4269 | break; | 4132 | break; |
4270 | case 0xac: /* shrd imm8, r, r/m */ | 4133 | case 0xac: /* shrd imm8, r, r/m */ |
4271 | case 0xad: /* shrd cl, r, r/m */ | 4134 | case 0xad: /* shrd cl, r, r/m */ |
4272 | emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags); | 4135 | emulate_2op_cl("shrd", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags); |
4273 | break; | 4136 | break; |
4274 | case 0xae: /* clflush */ | 4137 | case 0xae: /* clflush */ |
4275 | break; | 4138 | break; |
@@ -4278,38 +4141,38 @@ twobyte_insn: | |||
4278 | * Save real source value, then compare EAX against | 4141 | * Save real source value, then compare EAX against |
4279 | * destination. | 4142 | * destination. |
4280 | */ | 4143 | */ |
4281 | c->src.orig_val = c->src.val; | 4144 | ctxt->src.orig_val = ctxt->src.val; |
4282 | c->src.val = c->regs[VCPU_REGS_RAX]; | 4145 | ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; |
4283 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | 4146 | emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags); |
4284 | if (ctxt->eflags & EFLG_ZF) { | 4147 | if (ctxt->eflags & EFLG_ZF) { |
4285 | /* Success: write back to memory. */ | 4148 | /* Success: write back to memory. */ |
4286 | c->dst.val = c->src.orig_val; | 4149 | ctxt->dst.val = ctxt->src.orig_val; |
4287 | } else { | 4150 | } else { |
4288 | /* Failure: write the value we saw to EAX. */ | 4151 | /* Failure: write the value we saw to EAX. */ |
4289 | c->dst.type = OP_REG; | 4152 | ctxt->dst.type = OP_REG; |
4290 | c->dst.addr.reg = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 4153 | ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; |
4291 | } | 4154 | } |
4292 | break; | 4155 | break; |
4293 | case 0xb2: /* lss */ | 4156 | case 0xb2: /* lss */ |
4294 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_SS); | 4157 | rc = emulate_load_segment(ctxt, VCPU_SREG_SS); |
4295 | break; | 4158 | break; |
4296 | case 0xb3: | 4159 | case 0xb3: |
4297 | btr: /* btr */ | 4160 | btr: /* btr */ |
4298 | emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags); | 4161 | emulate_2op_SrcV_nobyte("btr", ctxt->src, ctxt->dst, ctxt->eflags); |
4299 | break; | 4162 | break; |
4300 | case 0xb4: /* lfs */ | 4163 | case 0xb4: /* lfs */ |
4301 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_FS); | 4164 | rc = emulate_load_segment(ctxt, VCPU_SREG_FS); |
4302 | break; | 4165 | break; |
4303 | case 0xb5: /* lgs */ | 4166 | case 0xb5: /* lgs */ |
4304 | rc = emulate_load_segment(ctxt, ops, VCPU_SREG_GS); | 4167 | rc = emulate_load_segment(ctxt, VCPU_SREG_GS); |
4305 | break; | 4168 | break; |
4306 | case 0xb6 ... 0xb7: /* movzx */ | 4169 | case 0xb6 ... 0xb7: /* movzx */ |
4307 | c->dst.bytes = c->op_bytes; | 4170 | ctxt->dst.bytes = ctxt->op_bytes; |
4308 | c->dst.val = (c->d & ByteOp) ? (u8) c->src.val | 4171 | ctxt->dst.val = (ctxt->d & ByteOp) ? (u8) ctxt->src.val |
4309 | : (u16) c->src.val; | 4172 | : (u16) ctxt->src.val; |
4310 | break; | 4173 | break; |
4311 | case 0xba: /* Grp8 */ | 4174 | case 0xba: /* Grp8 */ |
4312 | switch (c->modrm_reg & 3) { | 4175 | switch (ctxt->modrm_reg & 3) { |
4313 | case 0: | 4176 | case 0: |
4314 | goto bt; | 4177 | goto bt; |
4315 | case 1: | 4178 | case 1: |
@@ -4322,47 +4185,47 @@ twobyte_insn: | |||
4322 | break; | 4185 | break; |
4323 | case 0xbb: | 4186 | case 0xbb: |
4324 | btc: /* btc */ | 4187 | btc: /* btc */ |
4325 | emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags); | 4188 | emulate_2op_SrcV_nobyte("btc", ctxt->src, ctxt->dst, ctxt->eflags); |
4326 | break; | 4189 | break; |
4327 | case 0xbc: { /* bsf */ | 4190 | case 0xbc: { /* bsf */ |
4328 | u8 zf; | 4191 | u8 zf; |
4329 | __asm__ ("bsf %2, %0; setz %1" | 4192 | __asm__ ("bsf %2, %0; setz %1" |
4330 | : "=r"(c->dst.val), "=q"(zf) | 4193 | : "=r"(ctxt->dst.val), "=q"(zf) |
4331 | : "r"(c->src.val)); | 4194 | : "r"(ctxt->src.val)); |
4332 | ctxt->eflags &= ~X86_EFLAGS_ZF; | 4195 | ctxt->eflags &= ~X86_EFLAGS_ZF; |
4333 | if (zf) { | 4196 | if (zf) { |
4334 | ctxt->eflags |= X86_EFLAGS_ZF; | 4197 | ctxt->eflags |= X86_EFLAGS_ZF; |
4335 | c->dst.type = OP_NONE; /* Disable writeback. */ | 4198 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
4336 | } | 4199 | } |
4337 | break; | 4200 | break; |
4338 | } | 4201 | } |
4339 | case 0xbd: { /* bsr */ | 4202 | case 0xbd: { /* bsr */ |
4340 | u8 zf; | 4203 | u8 zf; |
4341 | __asm__ ("bsr %2, %0; setz %1" | 4204 | __asm__ ("bsr %2, %0; setz %1" |
4342 | : "=r"(c->dst.val), "=q"(zf) | 4205 | : "=r"(ctxt->dst.val), "=q"(zf) |
4343 | : "r"(c->src.val)); | 4206 | : "r"(ctxt->src.val)); |
4344 | ctxt->eflags &= ~X86_EFLAGS_ZF; | 4207 | ctxt->eflags &= ~X86_EFLAGS_ZF; |
4345 | if (zf) { | 4208 | if (zf) { |
4346 | ctxt->eflags |= X86_EFLAGS_ZF; | 4209 | ctxt->eflags |= X86_EFLAGS_ZF; |
4347 | c->dst.type = OP_NONE; /* Disable writeback. */ | 4210 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
4348 | } | 4211 | } |
4349 | break; | 4212 | break; |
4350 | } | 4213 | } |
4351 | case 0xbe ... 0xbf: /* movsx */ | 4214 | case 0xbe ... 0xbf: /* movsx */ |
4352 | c->dst.bytes = c->op_bytes; | 4215 | ctxt->dst.bytes = ctxt->op_bytes; |
4353 | c->dst.val = (c->d & ByteOp) ? (s8) c->src.val : | 4216 | ctxt->dst.val = (ctxt->d & ByteOp) ? (s8) ctxt->src.val : |
4354 | (s16) c->src.val; | 4217 | (s16) ctxt->src.val; |
4355 | break; | 4218 | break; |
4356 | case 0xc0 ... 0xc1: /* xadd */ | 4219 | case 0xc0 ... 0xc1: /* xadd */ |
4357 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 4220 | emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags); |
4358 | /* Write back the register source. */ | 4221 | /* Write back the register source. */ |
4359 | c->src.val = c->dst.orig_val; | 4222 | ctxt->src.val = ctxt->dst.orig_val; |
4360 | write_register_operand(&c->src); | 4223 | write_register_operand(&ctxt->src); |
4361 | break; | 4224 | break; |
4362 | case 0xc3: /* movnti */ | 4225 | case 0xc3: /* movnti */ |
4363 | c->dst.bytes = c->op_bytes; | 4226 | ctxt->dst.bytes = ctxt->op_bytes; |
4364 | c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val : | 4227 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : |
4365 | (u64) c->src.val; | 4228 | (u64) ctxt->src.val; |
4366 | break; | 4229 | break; |
4367 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 4230 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
4368 | rc = em_grp9(ctxt); | 4231 | rc = em_grp9(ctxt); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aee38623b768..1c5b69373a00 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include "mmu.h" | 22 | #include "mmu.h" |
23 | #include "x86.h" | 23 | #include "x86.h" |
24 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
25 | #include "x86.h" | ||
26 | 25 | ||
27 | #include <linux/kvm_host.h> | 26 | #include <linux/kvm_host.h> |
28 | #include <linux/types.h> | 27 | #include <linux/types.h> |
@@ -148,7 +147,7 @@ module_param(oos_shadow, bool, 0644); | |||
148 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 147 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
149 | | PT64_NX_MASK) | 148 | | PT64_NX_MASK) |
150 | 149 | ||
151 | #define RMAP_EXT 4 | 150 | #define PTE_LIST_EXT 4 |
152 | 151 | ||
153 | #define ACC_EXEC_MASK 1 | 152 | #define ACC_EXEC_MASK 1 |
154 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 153 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
@@ -164,16 +163,16 @@ module_param(oos_shadow, bool, 0644); | |||
164 | 163 | ||
165 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 164 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
166 | 165 | ||
167 | struct kvm_rmap_desc { | 166 | struct pte_list_desc { |
168 | u64 *sptes[RMAP_EXT]; | 167 | u64 *sptes[PTE_LIST_EXT]; |
169 | struct kvm_rmap_desc *more; | 168 | struct pte_list_desc *more; |
170 | }; | 169 | }; |
171 | 170 | ||
172 | struct kvm_shadow_walk_iterator { | 171 | struct kvm_shadow_walk_iterator { |
173 | u64 addr; | 172 | u64 addr; |
174 | hpa_t shadow_addr; | 173 | hpa_t shadow_addr; |
175 | int level; | ||
176 | u64 *sptep; | 174 | u64 *sptep; |
175 | int level; | ||
177 | unsigned index; | 176 | unsigned index; |
178 | }; | 177 | }; |
179 | 178 | ||
@@ -182,32 +181,68 @@ struct kvm_shadow_walk_iterator { | |||
182 | shadow_walk_okay(&(_walker)); \ | 181 | shadow_walk_okay(&(_walker)); \ |
183 | shadow_walk_next(&(_walker))) | 182 | shadow_walk_next(&(_walker))) |
184 | 183 | ||
185 | typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); | 184 | #define for_each_shadow_entry_lockless(_vcpu, _addr, _walker, spte) \ |
185 | for (shadow_walk_init(&(_walker), _vcpu, _addr); \ | ||
186 | shadow_walk_okay(&(_walker)) && \ | ||
187 | ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \ | ||
188 | __shadow_walk_next(&(_walker), spte)) | ||
186 | 189 | ||
187 | static struct kmem_cache *pte_chain_cache; | 190 | static struct kmem_cache *pte_list_desc_cache; |
188 | static struct kmem_cache *rmap_desc_cache; | ||
189 | static struct kmem_cache *mmu_page_header_cache; | 191 | static struct kmem_cache *mmu_page_header_cache; |
190 | static struct percpu_counter kvm_total_used_mmu_pages; | 192 | static struct percpu_counter kvm_total_used_mmu_pages; |
191 | 193 | ||
192 | static u64 __read_mostly shadow_trap_nonpresent_pte; | ||
193 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | ||
194 | static u64 __read_mostly shadow_nx_mask; | 194 | static u64 __read_mostly shadow_nx_mask; |
195 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | 195 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ |
196 | static u64 __read_mostly shadow_user_mask; | 196 | static u64 __read_mostly shadow_user_mask; |
197 | static u64 __read_mostly shadow_accessed_mask; | 197 | static u64 __read_mostly shadow_accessed_mask; |
198 | static u64 __read_mostly shadow_dirty_mask; | 198 | static u64 __read_mostly shadow_dirty_mask; |
199 | static u64 __read_mostly shadow_mmio_mask; | ||
199 | 200 | ||
200 | static inline u64 rsvd_bits(int s, int e) | 201 | static void mmu_spte_set(u64 *sptep, u64 spte); |
202 | |||
203 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) | ||
201 | { | 204 | { |
202 | return ((1ULL << (e - s + 1)) - 1) << s; | 205 | shadow_mmio_mask = mmio_mask; |
203 | } | 206 | } |
207 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | ||
204 | 208 | ||
205 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 209 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) |
206 | { | 210 | { |
207 | shadow_trap_nonpresent_pte = trap_pte; | 211 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
208 | shadow_notrap_nonpresent_pte = notrap_pte; | 212 | |
213 | trace_mark_mmio_spte(sptep, gfn, access); | ||
214 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); | ||
215 | } | ||
216 | |||
217 | static bool is_mmio_spte(u64 spte) | ||
218 | { | ||
219 | return (spte & shadow_mmio_mask) == shadow_mmio_mask; | ||
220 | } | ||
221 | |||
222 | static gfn_t get_mmio_spte_gfn(u64 spte) | ||
223 | { | ||
224 | return (spte & ~shadow_mmio_mask) >> PAGE_SHIFT; | ||
225 | } | ||
226 | |||
227 | static unsigned get_mmio_spte_access(u64 spte) | ||
228 | { | ||
229 | return (spte & ~shadow_mmio_mask) & ~PAGE_MASK; | ||
230 | } | ||
231 | |||
232 | static bool set_mmio_spte(u64 *sptep, gfn_t gfn, pfn_t pfn, unsigned access) | ||
233 | { | ||
234 | if (unlikely(is_noslot_pfn(pfn))) { | ||
235 | mark_mmio_spte(sptep, gfn, access); | ||
236 | return true; | ||
237 | } | ||
238 | |||
239 | return false; | ||
240 | } | ||
241 | |||
242 | static inline u64 rsvd_bits(int s, int e) | ||
243 | { | ||
244 | return ((1ULL << (e - s + 1)) - 1) << s; | ||
209 | } | 245 | } |
210 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | ||
211 | 246 | ||
212 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 247 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
213 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 248 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
@@ -220,11 +255,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
220 | } | 255 | } |
221 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 256 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
222 | 257 | ||
223 | static bool is_write_protection(struct kvm_vcpu *vcpu) | ||
224 | { | ||
225 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | ||
226 | } | ||
227 | |||
228 | static int is_cpuid_PSE36(void) | 258 | static int is_cpuid_PSE36(void) |
229 | { | 259 | { |
230 | return 1; | 260 | return 1; |
@@ -237,8 +267,7 @@ static int is_nx(struct kvm_vcpu *vcpu) | |||
237 | 267 | ||
238 | static int is_shadow_present_pte(u64 pte) | 268 | static int is_shadow_present_pte(u64 pte) |
239 | { | 269 | { |
240 | return pte != shadow_trap_nonpresent_pte | 270 | return pte & PT_PRESENT_MASK && !is_mmio_spte(pte); |
241 | && pte != shadow_notrap_nonpresent_pte; | ||
242 | } | 271 | } |
243 | 272 | ||
244 | static int is_large_pte(u64 pte) | 273 | static int is_large_pte(u64 pte) |
@@ -246,11 +275,6 @@ static int is_large_pte(u64 pte) | |||
246 | return pte & PT_PAGE_SIZE_MASK; | 275 | return pte & PT_PAGE_SIZE_MASK; |
247 | } | 276 | } |
248 | 277 | ||
249 | static int is_writable_pte(unsigned long pte) | ||
250 | { | ||
251 | return pte & PT_WRITABLE_MASK; | ||
252 | } | ||
253 | |||
254 | static int is_dirty_gpte(unsigned long pte) | 278 | static int is_dirty_gpte(unsigned long pte) |
255 | { | 279 | { |
256 | return pte & PT_DIRTY_MASK; | 280 | return pte & PT_DIRTY_MASK; |
@@ -282,26 +306,154 @@ static gfn_t pse36_gfn_delta(u32 gpte) | |||
282 | return (gpte & PT32_DIR_PSE36_MASK) << shift; | 306 | return (gpte & PT32_DIR_PSE36_MASK) << shift; |
283 | } | 307 | } |
284 | 308 | ||
309 | #ifdef CONFIG_X86_64 | ||
285 | static void __set_spte(u64 *sptep, u64 spte) | 310 | static void __set_spte(u64 *sptep, u64 spte) |
286 | { | 311 | { |
287 | set_64bit(sptep, spte); | 312 | *sptep = spte; |
288 | } | 313 | } |
289 | 314 | ||
290 | static u64 __xchg_spte(u64 *sptep, u64 new_spte) | 315 | static void __update_clear_spte_fast(u64 *sptep, u64 spte) |
291 | { | 316 | { |
292 | #ifdef CONFIG_X86_64 | 317 | *sptep = spte; |
293 | return xchg(sptep, new_spte); | 318 | } |
319 | |||
320 | static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) | ||
321 | { | ||
322 | return xchg(sptep, spte); | ||
323 | } | ||
324 | |||
325 | static u64 __get_spte_lockless(u64 *sptep) | ||
326 | { | ||
327 | return ACCESS_ONCE(*sptep); | ||
328 | } | ||
329 | |||
330 | static bool __check_direct_spte_mmio_pf(u64 spte) | ||
331 | { | ||
332 | /* It is valid if the spte is zapped. */ | ||
333 | return spte == 0ull; | ||
334 | } | ||
294 | #else | 335 | #else |
295 | u64 old_spte; | 336 | union split_spte { |
337 | struct { | ||
338 | u32 spte_low; | ||
339 | u32 spte_high; | ||
340 | }; | ||
341 | u64 spte; | ||
342 | }; | ||
296 | 343 | ||
297 | do { | 344 | static void count_spte_clear(u64 *sptep, u64 spte) |
298 | old_spte = *sptep; | 345 | { |
299 | } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); | 346 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
300 | 347 | ||
301 | return old_spte; | 348 | if (is_shadow_present_pte(spte)) |
302 | #endif | 349 | return; |
350 | |||
351 | /* Ensure the spte is completely set before we increase the count */ | ||
352 | smp_wmb(); | ||
353 | sp->clear_spte_count++; | ||
354 | } | ||
355 | |||
356 | static void __set_spte(u64 *sptep, u64 spte) | ||
357 | { | ||
358 | union split_spte *ssptep, sspte; | ||
359 | |||
360 | ssptep = (union split_spte *)sptep; | ||
361 | sspte = (union split_spte)spte; | ||
362 | |||
363 | ssptep->spte_high = sspte.spte_high; | ||
364 | |||
365 | /* | ||
366 | * If we map the spte from nonpresent to present, We should store | ||
367 | * the high bits firstly, then set present bit, so cpu can not | ||
368 | * fetch this spte while we are setting the spte. | ||
369 | */ | ||
370 | smp_wmb(); | ||
371 | |||
372 | ssptep->spte_low = sspte.spte_low; | ||
303 | } | 373 | } |
304 | 374 | ||
375 | static void __update_clear_spte_fast(u64 *sptep, u64 spte) | ||
376 | { | ||
377 | union split_spte *ssptep, sspte; | ||
378 | |||
379 | ssptep = (union split_spte *)sptep; | ||
380 | sspte = (union split_spte)spte; | ||
381 | |||
382 | ssptep->spte_low = sspte.spte_low; | ||
383 | |||
384 | /* | ||
385 | * If we map the spte from present to nonpresent, we should clear | ||
386 | * present bit firstly to avoid vcpu fetch the old high bits. | ||
387 | */ | ||
388 | smp_wmb(); | ||
389 | |||
390 | ssptep->spte_high = sspte.spte_high; | ||
391 | count_spte_clear(sptep, spte); | ||
392 | } | ||
393 | |||
394 | static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) | ||
395 | { | ||
396 | union split_spte *ssptep, sspte, orig; | ||
397 | |||
398 | ssptep = (union split_spte *)sptep; | ||
399 | sspte = (union split_spte)spte; | ||
400 | |||
401 | /* xchg acts as a barrier before the setting of the high bits */ | ||
402 | orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low); | ||
403 | orig.spte_high = ssptep->spte_high = sspte.spte_high; | ||
404 | count_spte_clear(sptep, spte); | ||
405 | |||
406 | return orig.spte; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * The idea using the light way get the spte on x86_32 guest is from | ||
411 | * gup_get_pte(arch/x86/mm/gup.c). | ||
412 | * The difference is we can not catch the spte tlb flush if we leave | ||
413 | * guest mode, so we emulate it by increase clear_spte_count when spte | ||
414 | * is cleared. | ||
415 | */ | ||
416 | static u64 __get_spte_lockless(u64 *sptep) | ||
417 | { | ||
418 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
419 | union split_spte spte, *orig = (union split_spte *)sptep; | ||
420 | int count; | ||
421 | |||
422 | retry: | ||
423 | count = sp->clear_spte_count; | ||
424 | smp_rmb(); | ||
425 | |||
426 | spte.spte_low = orig->spte_low; | ||
427 | smp_rmb(); | ||
428 | |||
429 | spte.spte_high = orig->spte_high; | ||
430 | smp_rmb(); | ||
431 | |||
432 | if (unlikely(spte.spte_low != orig->spte_low || | ||
433 | count != sp->clear_spte_count)) | ||
434 | goto retry; | ||
435 | |||
436 | return spte.spte; | ||
437 | } | ||
438 | |||
439 | static bool __check_direct_spte_mmio_pf(u64 spte) | ||
440 | { | ||
441 | union split_spte sspte = (union split_spte)spte; | ||
442 | u32 high_mmio_mask = shadow_mmio_mask >> 32; | ||
443 | |||
444 | /* It is valid if the spte is zapped. */ | ||
445 | if (spte == 0ull) | ||
446 | return true; | ||
447 | |||
448 | /* It is valid if the spte is being zapped. */ | ||
449 | if (sspte.spte_low == 0ull && | ||
450 | (sspte.spte_high & high_mmio_mask) == high_mmio_mask) | ||
451 | return true; | ||
452 | |||
453 | return false; | ||
454 | } | ||
455 | #endif | ||
456 | |||
305 | static bool spte_has_volatile_bits(u64 spte) | 457 | static bool spte_has_volatile_bits(u64 spte) |
306 | { | 458 | { |
307 | if (!shadow_accessed_mask) | 459 | if (!shadow_accessed_mask) |
@@ -322,12 +474,30 @@ static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask) | |||
322 | return (old_spte & bit_mask) && !(new_spte & bit_mask); | 474 | return (old_spte & bit_mask) && !(new_spte & bit_mask); |
323 | } | 475 | } |
324 | 476 | ||
325 | static void update_spte(u64 *sptep, u64 new_spte) | 477 | /* Rules for using mmu_spte_set: |
478 | * Set the sptep from nonpresent to present. | ||
479 | * Note: the sptep being assigned *must* be either not present | ||
480 | * or in a state where the hardware will not attempt to update | ||
481 | * the spte. | ||
482 | */ | ||
483 | static void mmu_spte_set(u64 *sptep, u64 new_spte) | ||
484 | { | ||
485 | WARN_ON(is_shadow_present_pte(*sptep)); | ||
486 | __set_spte(sptep, new_spte); | ||
487 | } | ||
488 | |||
489 | /* Rules for using mmu_spte_update: | ||
490 | * Update the state bits, it means the mapped pfn is not changged. | ||
491 | */ | ||
492 | static void mmu_spte_update(u64 *sptep, u64 new_spte) | ||
326 | { | 493 | { |
327 | u64 mask, old_spte = *sptep; | 494 | u64 mask, old_spte = *sptep; |
328 | 495 | ||
329 | WARN_ON(!is_rmap_spte(new_spte)); | 496 | WARN_ON(!is_rmap_spte(new_spte)); |
330 | 497 | ||
498 | if (!is_shadow_present_pte(old_spte)) | ||
499 | return mmu_spte_set(sptep, new_spte); | ||
500 | |||
331 | new_spte |= old_spte & shadow_dirty_mask; | 501 | new_spte |= old_spte & shadow_dirty_mask; |
332 | 502 | ||
333 | mask = shadow_accessed_mask; | 503 | mask = shadow_accessed_mask; |
@@ -335,9 +505,9 @@ static void update_spte(u64 *sptep, u64 new_spte) | |||
335 | mask |= shadow_dirty_mask; | 505 | mask |= shadow_dirty_mask; |
336 | 506 | ||
337 | if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask) | 507 | if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask) |
338 | __set_spte(sptep, new_spte); | 508 | __update_clear_spte_fast(sptep, new_spte); |
339 | else | 509 | else |
340 | old_spte = __xchg_spte(sptep, new_spte); | 510 | old_spte = __update_clear_spte_slow(sptep, new_spte); |
341 | 511 | ||
342 | if (!shadow_accessed_mask) | 512 | if (!shadow_accessed_mask) |
343 | return; | 513 | return; |
@@ -348,6 +518,64 @@ static void update_spte(u64 *sptep, u64 new_spte) | |||
348 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); | 518 | kvm_set_pfn_dirty(spte_to_pfn(old_spte)); |
349 | } | 519 | } |
350 | 520 | ||
521 | /* | ||
522 | * Rules for using mmu_spte_clear_track_bits: | ||
523 | * It sets the sptep from present to nonpresent, and track the | ||
524 | * state bits, it is used to clear the last level sptep. | ||
525 | */ | ||
526 | static int mmu_spte_clear_track_bits(u64 *sptep) | ||
527 | { | ||
528 | pfn_t pfn; | ||
529 | u64 old_spte = *sptep; | ||
530 | |||
531 | if (!spte_has_volatile_bits(old_spte)) | ||
532 | __update_clear_spte_fast(sptep, 0ull); | ||
533 | else | ||
534 | old_spte = __update_clear_spte_slow(sptep, 0ull); | ||
535 | |||
536 | if (!is_rmap_spte(old_spte)) | ||
537 | return 0; | ||
538 | |||
539 | pfn = spte_to_pfn(old_spte); | ||
540 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
541 | kvm_set_pfn_accessed(pfn); | ||
542 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | ||
543 | kvm_set_pfn_dirty(pfn); | ||
544 | return 1; | ||
545 | } | ||
546 | |||
547 | /* | ||
548 | * Rules for using mmu_spte_clear_no_track: | ||
549 | * Directly clear spte without caring the state bits of sptep, | ||
550 | * it is used to set the upper level spte. | ||
551 | */ | ||
552 | static void mmu_spte_clear_no_track(u64 *sptep) | ||
553 | { | ||
554 | __update_clear_spte_fast(sptep, 0ull); | ||
555 | } | ||
556 | |||
557 | static u64 mmu_spte_get_lockless(u64 *sptep) | ||
558 | { | ||
559 | return __get_spte_lockless(sptep); | ||
560 | } | ||
561 | |||
562 | static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) | ||
563 | { | ||
564 | rcu_read_lock(); | ||
565 | atomic_inc(&vcpu->kvm->arch.reader_counter); | ||
566 | |||
567 | /* Increase the counter before walking shadow page table */ | ||
568 | smp_mb__after_atomic_inc(); | ||
569 | } | ||
570 | |||
571 | static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) | ||
572 | { | ||
573 | /* Decrease the counter after walking shadow page table finished */ | ||
574 | smp_mb__before_atomic_dec(); | ||
575 | atomic_dec(&vcpu->kvm->arch.reader_counter); | ||
576 | rcu_read_unlock(); | ||
577 | } | ||
578 | |||
351 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 579 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
352 | struct kmem_cache *base_cache, int min) | 580 | struct kmem_cache *base_cache, int min) |
353 | { | 581 | { |
@@ -397,12 +625,8 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | |||
397 | { | 625 | { |
398 | int r; | 626 | int r; |
399 | 627 | ||
400 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache, | 628 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, |
401 | pte_chain_cache, 4); | 629 | pte_list_desc_cache, 8 + PTE_PREFETCH_NUM); |
402 | if (r) | ||
403 | goto out; | ||
404 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, | ||
405 | rmap_desc_cache, 4 + PTE_PREFETCH_NUM); | ||
406 | if (r) | 630 | if (r) |
407 | goto out; | 631 | goto out; |
408 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); | 632 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); |
@@ -416,8 +640,8 @@ out: | |||
416 | 640 | ||
417 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | 641 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
418 | { | 642 | { |
419 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); | 643 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, |
420 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); | 644 | pte_list_desc_cache); |
421 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); | 645 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); |
422 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, | 646 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, |
423 | mmu_page_header_cache); | 647 | mmu_page_header_cache); |
@@ -433,26 +657,15 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | |||
433 | return p; | 657 | return p; |
434 | } | 658 | } |
435 | 659 | ||
436 | static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) | 660 | static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu) |
437 | { | 661 | { |
438 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_chain_cache, | 662 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache, |
439 | sizeof(struct kvm_pte_chain)); | 663 | sizeof(struct pte_list_desc)); |
440 | } | 664 | } |
441 | 665 | ||
442 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) | 666 | static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) |
443 | { | 667 | { |
444 | kmem_cache_free(pte_chain_cache, pc); | 668 | kmem_cache_free(pte_list_desc_cache, pte_list_desc); |
445 | } | ||
446 | |||
447 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | ||
448 | { | ||
449 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache, | ||
450 | sizeof(struct kvm_rmap_desc)); | ||
451 | } | ||
452 | |||
453 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | ||
454 | { | ||
455 | kmem_cache_free(rmap_desc_cache, rd); | ||
456 | } | 669 | } |
457 | 670 | ||
458 | static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) | 671 | static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) |
@@ -498,6 +711,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
498 | linfo = lpage_info_slot(gfn, slot, i); | 711 | linfo = lpage_info_slot(gfn, slot, i); |
499 | linfo->write_count += 1; | 712 | linfo->write_count += 1; |
500 | } | 713 | } |
714 | kvm->arch.indirect_shadow_pages++; | ||
501 | } | 715 | } |
502 | 716 | ||
503 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 717 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
@@ -513,6 +727,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
513 | linfo->write_count -= 1; | 727 | linfo->write_count -= 1; |
514 | WARN_ON(linfo->write_count < 0); | 728 | WARN_ON(linfo->write_count < 0); |
515 | } | 729 | } |
730 | kvm->arch.indirect_shadow_pages--; | ||
516 | } | 731 | } |
517 | 732 | ||
518 | static int has_wrprotected_page(struct kvm *kvm, | 733 | static int has_wrprotected_page(struct kvm *kvm, |
@@ -588,67 +803,42 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
588 | } | 803 | } |
589 | 804 | ||
590 | /* | 805 | /* |
591 | * Take gfn and return the reverse mapping to it. | 806 | * Pte mapping structures: |
592 | */ | ||
593 | |||
594 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | ||
595 | { | ||
596 | struct kvm_memory_slot *slot; | ||
597 | struct kvm_lpage_info *linfo; | ||
598 | |||
599 | slot = gfn_to_memslot(kvm, gfn); | ||
600 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | ||
601 | return &slot->rmap[gfn - slot->base_gfn]; | ||
602 | |||
603 | linfo = lpage_info_slot(gfn, slot, level); | ||
604 | |||
605 | return &linfo->rmap_pde; | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * Reverse mapping data structures: | ||
610 | * | 807 | * |
611 | * If rmapp bit zero is zero, then rmapp point to the shadw page table entry | 808 | * If pte_list bit zero is zero, then pte_list point to the spte. |
612 | * that points to page_address(page). | ||
613 | * | 809 | * |
614 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc | 810 | * If pte_list bit zero is one, (then pte_list & ~1) points to a struct |
615 | * containing more mappings. | 811 | * pte_list_desc containing more mappings. |
616 | * | 812 | * |
617 | * Returns the number of rmap entries before the spte was added or zero if | 813 | * Returns the number of pte entries before the spte was added or zero if |
618 | * the spte was not added. | 814 | * the spte was not added. |
619 | * | 815 | * |
620 | */ | 816 | */ |
621 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 817 | static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, |
818 | unsigned long *pte_list) | ||
622 | { | 819 | { |
623 | struct kvm_mmu_page *sp; | 820 | struct pte_list_desc *desc; |
624 | struct kvm_rmap_desc *desc; | ||
625 | unsigned long *rmapp; | ||
626 | int i, count = 0; | 821 | int i, count = 0; |
627 | 822 | ||
628 | if (!is_rmap_spte(*spte)) | 823 | if (!*pte_list) { |
629 | return count; | 824 | rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte); |
630 | sp = page_header(__pa(spte)); | 825 | *pte_list = (unsigned long)spte; |
631 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); | 826 | } else if (!(*pte_list & 1)) { |
632 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 827 | rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte); |
633 | if (!*rmapp) { | 828 | desc = mmu_alloc_pte_list_desc(vcpu); |
634 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 829 | desc->sptes[0] = (u64 *)*pte_list; |
635 | *rmapp = (unsigned long)spte; | ||
636 | } else if (!(*rmapp & 1)) { | ||
637 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); | ||
638 | desc = mmu_alloc_rmap_desc(vcpu); | ||
639 | desc->sptes[0] = (u64 *)*rmapp; | ||
640 | desc->sptes[1] = spte; | 830 | desc->sptes[1] = spte; |
641 | *rmapp = (unsigned long)desc | 1; | 831 | *pte_list = (unsigned long)desc | 1; |
642 | ++count; | 832 | ++count; |
643 | } else { | 833 | } else { |
644 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | 834 | rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); |
645 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 835 | desc = (struct pte_list_desc *)(*pte_list & ~1ul); |
646 | while (desc->sptes[RMAP_EXT-1] && desc->more) { | 836 | while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { |
647 | desc = desc->more; | 837 | desc = desc->more; |
648 | count += RMAP_EXT; | 838 | count += PTE_LIST_EXT; |
649 | } | 839 | } |
650 | if (desc->sptes[RMAP_EXT-1]) { | 840 | if (desc->sptes[PTE_LIST_EXT-1]) { |
651 | desc->more = mmu_alloc_rmap_desc(vcpu); | 841 | desc->more = mmu_alloc_pte_list_desc(vcpu); |
652 | desc = desc->more; | 842 | desc = desc->more; |
653 | } | 843 | } |
654 | for (i = 0; desc->sptes[i]; ++i) | 844 | for (i = 0; desc->sptes[i]; ++i) |
@@ -658,59 +848,78 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
658 | return count; | 848 | return count; |
659 | } | 849 | } |
660 | 850 | ||
661 | static void rmap_desc_remove_entry(unsigned long *rmapp, | 851 | static u64 *pte_list_next(unsigned long *pte_list, u64 *spte) |
662 | struct kvm_rmap_desc *desc, | 852 | { |
663 | int i, | 853 | struct pte_list_desc *desc; |
664 | struct kvm_rmap_desc *prev_desc) | 854 | u64 *prev_spte; |
855 | int i; | ||
856 | |||
857 | if (!*pte_list) | ||
858 | return NULL; | ||
859 | else if (!(*pte_list & 1)) { | ||
860 | if (!spte) | ||
861 | return (u64 *)*pte_list; | ||
862 | return NULL; | ||
863 | } | ||
864 | desc = (struct pte_list_desc *)(*pte_list & ~1ul); | ||
865 | prev_spte = NULL; | ||
866 | while (desc) { | ||
867 | for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { | ||
868 | if (prev_spte == spte) | ||
869 | return desc->sptes[i]; | ||
870 | prev_spte = desc->sptes[i]; | ||
871 | } | ||
872 | desc = desc->more; | ||
873 | } | ||
874 | return NULL; | ||
875 | } | ||
876 | |||
877 | static void | ||
878 | pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, | ||
879 | int i, struct pte_list_desc *prev_desc) | ||
665 | { | 880 | { |
666 | int j; | 881 | int j; |
667 | 882 | ||
668 | for (j = RMAP_EXT - 1; !desc->sptes[j] && j > i; --j) | 883 | for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j) |
669 | ; | 884 | ; |
670 | desc->sptes[i] = desc->sptes[j]; | 885 | desc->sptes[i] = desc->sptes[j]; |
671 | desc->sptes[j] = NULL; | 886 | desc->sptes[j] = NULL; |
672 | if (j != 0) | 887 | if (j != 0) |
673 | return; | 888 | return; |
674 | if (!prev_desc && !desc->more) | 889 | if (!prev_desc && !desc->more) |
675 | *rmapp = (unsigned long)desc->sptes[0]; | 890 | *pte_list = (unsigned long)desc->sptes[0]; |
676 | else | 891 | else |
677 | if (prev_desc) | 892 | if (prev_desc) |
678 | prev_desc->more = desc->more; | 893 | prev_desc->more = desc->more; |
679 | else | 894 | else |
680 | *rmapp = (unsigned long)desc->more | 1; | 895 | *pte_list = (unsigned long)desc->more | 1; |
681 | mmu_free_rmap_desc(desc); | 896 | mmu_free_pte_list_desc(desc); |
682 | } | 897 | } |
683 | 898 | ||
684 | static void rmap_remove(struct kvm *kvm, u64 *spte) | 899 | static void pte_list_remove(u64 *spte, unsigned long *pte_list) |
685 | { | 900 | { |
686 | struct kvm_rmap_desc *desc; | 901 | struct pte_list_desc *desc; |
687 | struct kvm_rmap_desc *prev_desc; | 902 | struct pte_list_desc *prev_desc; |
688 | struct kvm_mmu_page *sp; | ||
689 | gfn_t gfn; | ||
690 | unsigned long *rmapp; | ||
691 | int i; | 903 | int i; |
692 | 904 | ||
693 | sp = page_header(__pa(spte)); | 905 | if (!*pte_list) { |
694 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); | 906 | printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte); |
695 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); | ||
696 | if (!*rmapp) { | ||
697 | printk(KERN_ERR "rmap_remove: %p 0->BUG\n", spte); | ||
698 | BUG(); | 907 | BUG(); |
699 | } else if (!(*rmapp & 1)) { | 908 | } else if (!(*pte_list & 1)) { |
700 | rmap_printk("rmap_remove: %p 1->0\n", spte); | 909 | rmap_printk("pte_list_remove: %p 1->0\n", spte); |
701 | if ((u64 *)*rmapp != spte) { | 910 | if ((u64 *)*pte_list != spte) { |
702 | printk(KERN_ERR "rmap_remove: %p 1->BUG\n", spte); | 911 | printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte); |
703 | BUG(); | 912 | BUG(); |
704 | } | 913 | } |
705 | *rmapp = 0; | 914 | *pte_list = 0; |
706 | } else { | 915 | } else { |
707 | rmap_printk("rmap_remove: %p many->many\n", spte); | 916 | rmap_printk("pte_list_remove: %p many->many\n", spte); |
708 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 917 | desc = (struct pte_list_desc *)(*pte_list & ~1ul); |
709 | prev_desc = NULL; | 918 | prev_desc = NULL; |
710 | while (desc) { | 919 | while (desc) { |
711 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) | 920 | for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) |
712 | if (desc->sptes[i] == spte) { | 921 | if (desc->sptes[i] == spte) { |
713 | rmap_desc_remove_entry(rmapp, | 922 | pte_list_desc_remove_entry(pte_list, |
714 | desc, i, | 923 | desc, i, |
715 | prev_desc); | 924 | prev_desc); |
716 | return; | 925 | return; |
@@ -718,62 +927,80 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
718 | prev_desc = desc; | 927 | prev_desc = desc; |
719 | desc = desc->more; | 928 | desc = desc->more; |
720 | } | 929 | } |
721 | pr_err("rmap_remove: %p many->many\n", spte); | 930 | pr_err("pte_list_remove: %p many->many\n", spte); |
722 | BUG(); | 931 | BUG(); |
723 | } | 932 | } |
724 | } | 933 | } |
725 | 934 | ||
726 | static int set_spte_track_bits(u64 *sptep, u64 new_spte) | 935 | typedef void (*pte_list_walk_fn) (u64 *spte); |
936 | static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | ||
727 | { | 937 | { |
728 | pfn_t pfn; | 938 | struct pte_list_desc *desc; |
729 | u64 old_spte = *sptep; | 939 | int i; |
730 | 940 | ||
731 | if (!spte_has_volatile_bits(old_spte)) | 941 | if (!*pte_list) |
732 | __set_spte(sptep, new_spte); | 942 | return; |
733 | else | ||
734 | old_spte = __xchg_spte(sptep, new_spte); | ||
735 | 943 | ||
736 | if (!is_rmap_spte(old_spte)) | 944 | if (!(*pte_list & 1)) |
737 | return 0; | 945 | return fn((u64 *)*pte_list); |
738 | 946 | ||
739 | pfn = spte_to_pfn(old_spte); | 947 | desc = (struct pte_list_desc *)(*pte_list & ~1ul); |
740 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | 948 | while (desc) { |
741 | kvm_set_pfn_accessed(pfn); | 949 | for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) |
742 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | 950 | fn(desc->sptes[i]); |
743 | kvm_set_pfn_dirty(pfn); | 951 | desc = desc->more; |
744 | return 1; | 952 | } |
745 | } | 953 | } |
746 | 954 | ||
747 | static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) | 955 | /* |
956 | * Take gfn and return the reverse mapping to it. | ||
957 | */ | ||
958 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | ||
748 | { | 959 | { |
749 | if (set_spte_track_bits(sptep, new_spte)) | 960 | struct kvm_memory_slot *slot; |
750 | rmap_remove(kvm, sptep); | 961 | struct kvm_lpage_info *linfo; |
962 | |||
963 | slot = gfn_to_memslot(kvm, gfn); | ||
964 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | ||
965 | return &slot->rmap[gfn - slot->base_gfn]; | ||
966 | |||
967 | linfo = lpage_info_slot(gfn, slot, level); | ||
968 | |||
969 | return &linfo->rmap_pde; | ||
970 | } | ||
971 | |||
972 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | ||
973 | { | ||
974 | struct kvm_mmu_page *sp; | ||
975 | unsigned long *rmapp; | ||
976 | |||
977 | sp = page_header(__pa(spte)); | ||
978 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); | ||
979 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | ||
980 | return pte_list_add(vcpu, spte, rmapp); | ||
751 | } | 981 | } |
752 | 982 | ||
753 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 983 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
754 | { | 984 | { |
755 | struct kvm_rmap_desc *desc; | 985 | return pte_list_next(rmapp, spte); |
756 | u64 *prev_spte; | 986 | } |
757 | int i; | ||
758 | 987 | ||
759 | if (!*rmapp) | 988 | static void rmap_remove(struct kvm *kvm, u64 *spte) |
760 | return NULL; | 989 | { |
761 | else if (!(*rmapp & 1)) { | 990 | struct kvm_mmu_page *sp; |
762 | if (!spte) | 991 | gfn_t gfn; |
763 | return (u64 *)*rmapp; | 992 | unsigned long *rmapp; |
764 | return NULL; | 993 | |
765 | } | 994 | sp = page_header(__pa(spte)); |
766 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 995 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); |
767 | prev_spte = NULL; | 996 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); |
768 | while (desc) { | 997 | pte_list_remove(spte, rmapp); |
769 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 998 | } |
770 | if (prev_spte == spte) | 999 | |
771 | return desc->sptes[i]; | 1000 | static void drop_spte(struct kvm *kvm, u64 *sptep) |
772 | prev_spte = desc->sptes[i]; | 1001 | { |
773 | } | 1002 | if (mmu_spte_clear_track_bits(sptep)) |
774 | desc = desc->more; | 1003 | rmap_remove(kvm, sptep); |
775 | } | ||
776 | return NULL; | ||
777 | } | 1004 | } |
778 | 1005 | ||
779 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1006 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) |
@@ -790,7 +1017,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
790 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1017 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
791 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 1018 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
792 | if (is_writable_pte(*spte)) { | 1019 | if (is_writable_pte(*spte)) { |
793 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); | 1020 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); |
794 | write_protected = 1; | 1021 | write_protected = 1; |
795 | } | 1022 | } |
796 | spte = rmap_next(kvm, rmapp, spte); | 1023 | spte = rmap_next(kvm, rmapp, spte); |
@@ -807,8 +1034,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
807 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 1034 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
808 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 1035 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
809 | if (is_writable_pte(*spte)) { | 1036 | if (is_writable_pte(*spte)) { |
810 | drop_spte(kvm, spte, | 1037 | drop_spte(kvm, spte); |
811 | shadow_trap_nonpresent_pte); | ||
812 | --kvm->stat.lpages; | 1038 | --kvm->stat.lpages; |
813 | spte = NULL; | 1039 | spte = NULL; |
814 | write_protected = 1; | 1040 | write_protected = 1; |
@@ -829,7 +1055,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
829 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | 1055 | while ((spte = rmap_next(kvm, rmapp, NULL))) { |
830 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1056 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
831 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 1057 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); |
832 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); | 1058 | drop_spte(kvm, spte); |
833 | need_tlb_flush = 1; | 1059 | need_tlb_flush = 1; |
834 | } | 1060 | } |
835 | return need_tlb_flush; | 1061 | return need_tlb_flush; |
@@ -851,7 +1077,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
851 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 1077 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); |
852 | need_flush = 1; | 1078 | need_flush = 1; |
853 | if (pte_write(*ptep)) { | 1079 | if (pte_write(*ptep)) { |
854 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); | 1080 | drop_spte(kvm, spte); |
855 | spte = rmap_next(kvm, rmapp, NULL); | 1081 | spte = rmap_next(kvm, rmapp, NULL); |
856 | } else { | 1082 | } else { |
857 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 1083 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); |
@@ -860,7 +1086,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
860 | new_spte &= ~PT_WRITABLE_MASK; | 1086 | new_spte &= ~PT_WRITABLE_MASK; |
861 | new_spte &= ~SPTE_HOST_WRITEABLE; | 1087 | new_spte &= ~SPTE_HOST_WRITEABLE; |
862 | new_spte &= ~shadow_accessed_mask; | 1088 | new_spte &= ~shadow_accessed_mask; |
863 | set_spte_track_bits(spte, new_spte); | 1089 | mmu_spte_clear_track_bits(spte); |
1090 | mmu_spte_set(spte, new_spte); | ||
864 | spte = rmap_next(kvm, rmapp, spte); | 1091 | spte = rmap_next(kvm, rmapp, spte); |
865 | } | 1092 | } |
866 | } | 1093 | } |
@@ -1032,151 +1259,89 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) | |||
1032 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); | 1259 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); |
1033 | } | 1260 | } |
1034 | 1261 | ||
1035 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1262 | /* |
1263 | * Remove the sp from shadow page cache, after call it, | ||
1264 | * we can not find this sp from the cache, and the shadow | ||
1265 | * page table is still valid. | ||
1266 | * It should be under the protection of mmu lock. | ||
1267 | */ | ||
1268 | static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp) | ||
1036 | { | 1269 | { |
1037 | ASSERT(is_empty_shadow_page(sp->spt)); | 1270 | ASSERT(is_empty_shadow_page(sp->spt)); |
1038 | hlist_del(&sp->hash_link); | 1271 | hlist_del(&sp->hash_link); |
1039 | list_del(&sp->link); | ||
1040 | free_page((unsigned long)sp->spt); | ||
1041 | if (!sp->role.direct) | 1272 | if (!sp->role.direct) |
1042 | free_page((unsigned long)sp->gfns); | 1273 | free_page((unsigned long)sp->gfns); |
1043 | kmem_cache_free(mmu_page_header_cache, sp); | ||
1044 | kvm_mod_used_mmu_pages(kvm, -1); | ||
1045 | } | 1274 | } |
1046 | 1275 | ||
1047 | static unsigned kvm_page_table_hashfn(gfn_t gfn) | 1276 | /* |
1277 | * Free the shadow page table and the sp, we can do it | ||
1278 | * out of the protection of mmu lock. | ||
1279 | */ | ||
1280 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) | ||
1048 | { | 1281 | { |
1049 | return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); | 1282 | list_del(&sp->link); |
1283 | free_page((unsigned long)sp->spt); | ||
1284 | kmem_cache_free(mmu_page_header_cache, sp); | ||
1050 | } | 1285 | } |
1051 | 1286 | ||
1052 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 1287 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
1053 | u64 *parent_pte, int direct) | ||
1054 | { | 1288 | { |
1055 | struct kvm_mmu_page *sp; | 1289 | return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); |
1056 | |||
1057 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); | ||
1058 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | ||
1059 | if (!direct) | ||
1060 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | ||
1061 | PAGE_SIZE); | ||
1062 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | ||
1063 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | ||
1064 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | ||
1065 | sp->multimapped = 0; | ||
1066 | sp->parent_pte = parent_pte; | ||
1067 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | ||
1068 | return sp; | ||
1069 | } | 1290 | } |
1070 | 1291 | ||
1071 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, | 1292 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, |
1072 | struct kvm_mmu_page *sp, u64 *parent_pte) | 1293 | struct kvm_mmu_page *sp, u64 *parent_pte) |
1073 | { | 1294 | { |
1074 | struct kvm_pte_chain *pte_chain; | ||
1075 | struct hlist_node *node; | ||
1076 | int i; | ||
1077 | |||
1078 | if (!parent_pte) | 1295 | if (!parent_pte) |
1079 | return; | 1296 | return; |
1080 | if (!sp->multimapped) { | ||
1081 | u64 *old = sp->parent_pte; | ||
1082 | 1297 | ||
1083 | if (!old) { | 1298 | pte_list_add(vcpu, parent_pte, &sp->parent_ptes); |
1084 | sp->parent_pte = parent_pte; | ||
1085 | return; | ||
1086 | } | ||
1087 | sp->multimapped = 1; | ||
1088 | pte_chain = mmu_alloc_pte_chain(vcpu); | ||
1089 | INIT_HLIST_HEAD(&sp->parent_ptes); | ||
1090 | hlist_add_head(&pte_chain->link, &sp->parent_ptes); | ||
1091 | pte_chain->parent_ptes[0] = old; | ||
1092 | } | ||
1093 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) { | ||
1094 | if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1]) | ||
1095 | continue; | ||
1096 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) | ||
1097 | if (!pte_chain->parent_ptes[i]) { | ||
1098 | pte_chain->parent_ptes[i] = parent_pte; | ||
1099 | return; | ||
1100 | } | ||
1101 | } | ||
1102 | pte_chain = mmu_alloc_pte_chain(vcpu); | ||
1103 | BUG_ON(!pte_chain); | ||
1104 | hlist_add_head(&pte_chain->link, &sp->parent_ptes); | ||
1105 | pte_chain->parent_ptes[0] = parent_pte; | ||
1106 | } | 1299 | } |
1107 | 1300 | ||
1108 | static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | 1301 | static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, |
1109 | u64 *parent_pte) | 1302 | u64 *parent_pte) |
1110 | { | 1303 | { |
1111 | struct kvm_pte_chain *pte_chain; | 1304 | pte_list_remove(parent_pte, &sp->parent_ptes); |
1112 | struct hlist_node *node; | ||
1113 | int i; | ||
1114 | |||
1115 | if (!sp->multimapped) { | ||
1116 | BUG_ON(sp->parent_pte != parent_pte); | ||
1117 | sp->parent_pte = NULL; | ||
1118 | return; | ||
1119 | } | ||
1120 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
1121 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
1122 | if (!pte_chain->parent_ptes[i]) | ||
1123 | break; | ||
1124 | if (pte_chain->parent_ptes[i] != parent_pte) | ||
1125 | continue; | ||
1126 | while (i + 1 < NR_PTE_CHAIN_ENTRIES | ||
1127 | && pte_chain->parent_ptes[i + 1]) { | ||
1128 | pte_chain->parent_ptes[i] | ||
1129 | = pte_chain->parent_ptes[i + 1]; | ||
1130 | ++i; | ||
1131 | } | ||
1132 | pte_chain->parent_ptes[i] = NULL; | ||
1133 | if (i == 0) { | ||
1134 | hlist_del(&pte_chain->link); | ||
1135 | mmu_free_pte_chain(pte_chain); | ||
1136 | if (hlist_empty(&sp->parent_ptes)) { | ||
1137 | sp->multimapped = 0; | ||
1138 | sp->parent_pte = NULL; | ||
1139 | } | ||
1140 | } | ||
1141 | return; | ||
1142 | } | ||
1143 | BUG(); | ||
1144 | } | 1305 | } |
1145 | 1306 | ||
1146 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | 1307 | static void drop_parent_pte(struct kvm_mmu_page *sp, |
1308 | u64 *parent_pte) | ||
1147 | { | 1309 | { |
1148 | struct kvm_pte_chain *pte_chain; | 1310 | mmu_page_remove_parent_pte(sp, parent_pte); |
1149 | struct hlist_node *node; | 1311 | mmu_spte_clear_no_track(parent_pte); |
1150 | struct kvm_mmu_page *parent_sp; | 1312 | } |
1151 | int i; | ||
1152 | |||
1153 | if (!sp->multimapped && sp->parent_pte) { | ||
1154 | parent_sp = page_header(__pa(sp->parent_pte)); | ||
1155 | fn(parent_sp, sp->parent_pte); | ||
1156 | return; | ||
1157 | } | ||
1158 | |||
1159 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
1160 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
1161 | u64 *spte = pte_chain->parent_ptes[i]; | ||
1162 | 1313 | ||
1163 | if (!spte) | 1314 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
1164 | break; | 1315 | u64 *parent_pte, int direct) |
1165 | parent_sp = page_header(__pa(spte)); | 1316 | { |
1166 | fn(parent_sp, spte); | 1317 | struct kvm_mmu_page *sp; |
1167 | } | 1318 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, |
1319 | sizeof *sp); | ||
1320 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | ||
1321 | if (!direct) | ||
1322 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | ||
1323 | PAGE_SIZE); | ||
1324 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | ||
1325 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | ||
1326 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | ||
1327 | sp->parent_ptes = 0; | ||
1328 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | ||
1329 | kvm_mod_used_mmu_pages(vcpu->kvm, +1); | ||
1330 | return sp; | ||
1168 | } | 1331 | } |
1169 | 1332 | ||
1170 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); | 1333 | static void mark_unsync(u64 *spte); |
1171 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | 1334 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
1172 | { | 1335 | { |
1173 | mmu_parent_walk(sp, mark_unsync); | 1336 | pte_list_walk(&sp->parent_ptes, mark_unsync); |
1174 | } | 1337 | } |
1175 | 1338 | ||
1176 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) | 1339 | static void mark_unsync(u64 *spte) |
1177 | { | 1340 | { |
1341 | struct kvm_mmu_page *sp; | ||
1178 | unsigned int index; | 1342 | unsigned int index; |
1179 | 1343 | ||
1344 | sp = page_header(__pa(spte)); | ||
1180 | index = spte - sp->spt; | 1345 | index = spte - sp->spt; |
1181 | if (__test_and_set_bit(index, sp->unsync_child_bitmap)) | 1346 | if (__test_and_set_bit(index, sp->unsync_child_bitmap)) |
1182 | return; | 1347 | return; |
@@ -1185,15 +1350,6 @@ static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) | |||
1185 | kvm_mmu_mark_parents_unsync(sp); | 1350 | kvm_mmu_mark_parents_unsync(sp); |
1186 | } | 1351 | } |
1187 | 1352 | ||
1188 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | ||
1189 | struct kvm_mmu_page *sp) | ||
1190 | { | ||
1191 | int i; | ||
1192 | |||
1193 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
1194 | sp->spt[i] = shadow_trap_nonpresent_pte; | ||
1195 | } | ||
1196 | |||
1197 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1353 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1198 | struct kvm_mmu_page *sp) | 1354 | struct kvm_mmu_page *sp) |
1199 | { | 1355 | { |
@@ -1475,6 +1631,14 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1475 | } | 1631 | } |
1476 | } | 1632 | } |
1477 | 1633 | ||
1634 | static void init_shadow_page_table(struct kvm_mmu_page *sp) | ||
1635 | { | ||
1636 | int i; | ||
1637 | |||
1638 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
1639 | sp->spt[i] = 0ull; | ||
1640 | } | ||
1641 | |||
1478 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1642 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
1479 | gfn_t gfn, | 1643 | gfn_t gfn, |
1480 | gva_t gaddr, | 1644 | gva_t gaddr, |
@@ -1537,10 +1701,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1537 | 1701 | ||
1538 | account_shadowed(vcpu->kvm, gfn); | 1702 | account_shadowed(vcpu->kvm, gfn); |
1539 | } | 1703 | } |
1540 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1704 | init_shadow_page_table(sp); |
1541 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | ||
1542 | else | ||
1543 | nonpaging_prefetch_page(vcpu, sp); | ||
1544 | trace_kvm_mmu_get_page(sp, true); | 1705 | trace_kvm_mmu_get_page(sp, true); |
1545 | return sp; | 1706 | return sp; |
1546 | } | 1707 | } |
@@ -1572,21 +1733,28 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) | |||
1572 | if (iterator->level < PT_PAGE_TABLE_LEVEL) | 1733 | if (iterator->level < PT_PAGE_TABLE_LEVEL) |
1573 | return false; | 1734 | return false; |
1574 | 1735 | ||
1575 | if (iterator->level == PT_PAGE_TABLE_LEVEL) | ||
1576 | if (is_large_pte(*iterator->sptep)) | ||
1577 | return false; | ||
1578 | |||
1579 | iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); | 1736 | iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); |
1580 | iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; | 1737 | iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; |
1581 | return true; | 1738 | return true; |
1582 | } | 1739 | } |
1583 | 1740 | ||
1584 | static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | 1741 | static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator, |
1742 | u64 spte) | ||
1585 | { | 1743 | { |
1586 | iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK; | 1744 | if (is_last_spte(spte, iterator->level)) { |
1745 | iterator->level = 0; | ||
1746 | return; | ||
1747 | } | ||
1748 | |||
1749 | iterator->shadow_addr = spte & PT64_BASE_ADDR_MASK; | ||
1587 | --iterator->level; | 1750 | --iterator->level; |
1588 | } | 1751 | } |
1589 | 1752 | ||
1753 | static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | ||
1754 | { | ||
1755 | return __shadow_walk_next(iterator, *iterator->sptep); | ||
1756 | } | ||
1757 | |||
1590 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | 1758 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) |
1591 | { | 1759 | { |
1592 | u64 spte; | 1760 | u64 spte; |
@@ -1594,13 +1762,13 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | |||
1594 | spte = __pa(sp->spt) | 1762 | spte = __pa(sp->spt) |
1595 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 1763 | | PT_PRESENT_MASK | PT_ACCESSED_MASK |
1596 | | PT_WRITABLE_MASK | PT_USER_MASK; | 1764 | | PT_WRITABLE_MASK | PT_USER_MASK; |
1597 | __set_spte(sptep, spte); | 1765 | mmu_spte_set(sptep, spte); |
1598 | } | 1766 | } |
1599 | 1767 | ||
1600 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | 1768 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) |
1601 | { | 1769 | { |
1602 | if (is_large_pte(*sptep)) { | 1770 | if (is_large_pte(*sptep)) { |
1603 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | 1771 | drop_spte(vcpu->kvm, sptep); |
1604 | kvm_flush_remote_tlbs(vcpu->kvm); | 1772 | kvm_flush_remote_tlbs(vcpu->kvm); |
1605 | } | 1773 | } |
1606 | } | 1774 | } |
@@ -1622,38 +1790,39 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1622 | if (child->role.access == direct_access) | 1790 | if (child->role.access == direct_access) |
1623 | return; | 1791 | return; |
1624 | 1792 | ||
1625 | mmu_page_remove_parent_pte(child, sptep); | 1793 | drop_parent_pte(child, sptep); |
1626 | __set_spte(sptep, shadow_trap_nonpresent_pte); | ||
1627 | kvm_flush_remote_tlbs(vcpu->kvm); | 1794 | kvm_flush_remote_tlbs(vcpu->kvm); |
1628 | } | 1795 | } |
1629 | } | 1796 | } |
1630 | 1797 | ||
1798 | static void mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, | ||
1799 | u64 *spte) | ||
1800 | { | ||
1801 | u64 pte; | ||
1802 | struct kvm_mmu_page *child; | ||
1803 | |||
1804 | pte = *spte; | ||
1805 | if (is_shadow_present_pte(pte)) { | ||
1806 | if (is_last_spte(pte, sp->role.level)) | ||
1807 | drop_spte(kvm, spte); | ||
1808 | else { | ||
1809 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
1810 | drop_parent_pte(child, spte); | ||
1811 | } | ||
1812 | } else if (is_mmio_spte(pte)) | ||
1813 | mmu_spte_clear_no_track(spte); | ||
1814 | |||
1815 | if (is_large_pte(pte)) | ||
1816 | --kvm->stat.lpages; | ||
1817 | } | ||
1818 | |||
1631 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1819 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
1632 | struct kvm_mmu_page *sp) | 1820 | struct kvm_mmu_page *sp) |
1633 | { | 1821 | { |
1634 | unsigned i; | 1822 | unsigned i; |
1635 | u64 *pt; | 1823 | |
1636 | u64 ent; | 1824 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
1637 | 1825 | mmu_page_zap_pte(kvm, sp, sp->spt + i); | |
1638 | pt = sp->spt; | ||
1639 | |||
1640 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
1641 | ent = pt[i]; | ||
1642 | |||
1643 | if (is_shadow_present_pte(ent)) { | ||
1644 | if (!is_last_spte(ent, sp->role.level)) { | ||
1645 | ent &= PT64_BASE_ADDR_MASK; | ||
1646 | mmu_page_remove_parent_pte(page_header(ent), | ||
1647 | &pt[i]); | ||
1648 | } else { | ||
1649 | if (is_large_pte(ent)) | ||
1650 | --kvm->stat.lpages; | ||
1651 | drop_spte(kvm, &pt[i], | ||
1652 | shadow_trap_nonpresent_pte); | ||
1653 | } | ||
1654 | } | ||
1655 | pt[i] = shadow_trap_nonpresent_pte; | ||
1656 | } | ||
1657 | } | 1826 | } |
1658 | 1827 | ||
1659 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | 1828 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) |
@@ -1674,20 +1843,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1674 | { | 1843 | { |
1675 | u64 *parent_pte; | 1844 | u64 *parent_pte; |
1676 | 1845 | ||
1677 | while (sp->multimapped || sp->parent_pte) { | 1846 | while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL))) |
1678 | if (!sp->multimapped) | 1847 | drop_parent_pte(sp, parent_pte); |
1679 | parent_pte = sp->parent_pte; | ||
1680 | else { | ||
1681 | struct kvm_pte_chain *chain; | ||
1682 | |||
1683 | chain = container_of(sp->parent_ptes.first, | ||
1684 | struct kvm_pte_chain, link); | ||
1685 | parent_pte = chain->parent_ptes[0]; | ||
1686 | } | ||
1687 | BUG_ON(!parent_pte); | ||
1688 | kvm_mmu_put_page(sp, parent_pte); | ||
1689 | __set_spte(parent_pte, shadow_trap_nonpresent_pte); | ||
1690 | } | ||
1691 | } | 1848 | } |
1692 | 1849 | ||
1693 | static int mmu_zap_unsync_children(struct kvm *kvm, | 1850 | static int mmu_zap_unsync_children(struct kvm *kvm, |
@@ -1734,6 +1891,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1734 | /* Count self */ | 1891 | /* Count self */ |
1735 | ret++; | 1892 | ret++; |
1736 | list_move(&sp->link, invalid_list); | 1893 | list_move(&sp->link, invalid_list); |
1894 | kvm_mod_used_mmu_pages(kvm, -1); | ||
1737 | } else { | 1895 | } else { |
1738 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 1896 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
1739 | kvm_reload_remote_mmus(kvm); | 1897 | kvm_reload_remote_mmus(kvm); |
@@ -1744,6 +1902,30 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1744 | return ret; | 1902 | return ret; |
1745 | } | 1903 | } |
1746 | 1904 | ||
1905 | static void kvm_mmu_isolate_pages(struct list_head *invalid_list) | ||
1906 | { | ||
1907 | struct kvm_mmu_page *sp; | ||
1908 | |||
1909 | list_for_each_entry(sp, invalid_list, link) | ||
1910 | kvm_mmu_isolate_page(sp); | ||
1911 | } | ||
1912 | |||
1913 | static void free_pages_rcu(struct rcu_head *head) | ||
1914 | { | ||
1915 | struct kvm_mmu_page *next, *sp; | ||
1916 | |||
1917 | sp = container_of(head, struct kvm_mmu_page, rcu); | ||
1918 | while (sp) { | ||
1919 | if (!list_empty(&sp->link)) | ||
1920 | next = list_first_entry(&sp->link, | ||
1921 | struct kvm_mmu_page, link); | ||
1922 | else | ||
1923 | next = NULL; | ||
1924 | kvm_mmu_free_page(sp); | ||
1925 | sp = next; | ||
1926 | } | ||
1927 | } | ||
1928 | |||
1747 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1929 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1748 | struct list_head *invalid_list) | 1930 | struct list_head *invalid_list) |
1749 | { | 1931 | { |
@@ -1754,10 +1936,21 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1754 | 1936 | ||
1755 | kvm_flush_remote_tlbs(kvm); | 1937 | kvm_flush_remote_tlbs(kvm); |
1756 | 1938 | ||
1939 | if (atomic_read(&kvm->arch.reader_counter)) { | ||
1940 | kvm_mmu_isolate_pages(invalid_list); | ||
1941 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | ||
1942 | list_del_init(invalid_list); | ||
1943 | |||
1944 | trace_kvm_mmu_delay_free_pages(sp); | ||
1945 | call_rcu(&sp->rcu, free_pages_rcu); | ||
1946 | return; | ||
1947 | } | ||
1948 | |||
1757 | do { | 1949 | do { |
1758 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 1950 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); |
1759 | WARN_ON(!sp->role.invalid || sp->root_count); | 1951 | WARN_ON(!sp->role.invalid || sp->root_count); |
1760 | kvm_mmu_free_page(kvm, sp); | 1952 | kvm_mmu_isolate_page(sp); |
1953 | kvm_mmu_free_page(sp); | ||
1761 | } while (!list_empty(invalid_list)); | 1954 | } while (!list_empty(invalid_list)); |
1762 | 1955 | ||
1763 | } | 1956 | } |
@@ -1783,8 +1976,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | |||
1783 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1976 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1784 | struct kvm_mmu_page, link); | 1977 | struct kvm_mmu_page, link); |
1785 | kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); | 1978 | kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); |
1786 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
1787 | } | 1979 | } |
1980 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
1788 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; | 1981 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; |
1789 | } | 1982 | } |
1790 | 1983 | ||
@@ -1833,20 +2026,6 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | |||
1833 | __set_bit(slot, sp->slot_bitmap); | 2026 | __set_bit(slot, sp->slot_bitmap); |
1834 | } | 2027 | } |
1835 | 2028 | ||
1836 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) | ||
1837 | { | ||
1838 | int i; | ||
1839 | u64 *pt = sp->spt; | ||
1840 | |||
1841 | if (shadow_trap_nonpresent_pte == shadow_notrap_nonpresent_pte) | ||
1842 | return; | ||
1843 | |||
1844 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
1845 | if (pt[i] == shadow_notrap_nonpresent_pte) | ||
1846 | __set_spte(&pt[i], shadow_trap_nonpresent_pte); | ||
1847 | } | ||
1848 | } | ||
1849 | |||
1850 | /* | 2029 | /* |
1851 | * The function is based on mtrr_type_lookup() in | 2030 | * The function is based on mtrr_type_lookup() in |
1852 | * arch/x86/kernel/cpu/mtrr/generic.c | 2031 | * arch/x86/kernel/cpu/mtrr/generic.c |
@@ -1959,7 +2138,6 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1959 | sp->unsync = 1; | 2138 | sp->unsync = 1; |
1960 | 2139 | ||
1961 | kvm_mmu_mark_parents_unsync(sp); | 2140 | kvm_mmu_mark_parents_unsync(sp); |
1962 | mmu_convert_notrap(sp); | ||
1963 | } | 2141 | } |
1964 | 2142 | ||
1965 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | 2143 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) |
@@ -2002,13 +2180,16 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2002 | 2180 | ||
2003 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 2181 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
2004 | unsigned pte_access, int user_fault, | 2182 | unsigned pte_access, int user_fault, |
2005 | int write_fault, int dirty, int level, | 2183 | int write_fault, int level, |
2006 | gfn_t gfn, pfn_t pfn, bool speculative, | 2184 | gfn_t gfn, pfn_t pfn, bool speculative, |
2007 | bool can_unsync, bool host_writable) | 2185 | bool can_unsync, bool host_writable) |
2008 | { | 2186 | { |
2009 | u64 spte, entry = *sptep; | 2187 | u64 spte, entry = *sptep; |
2010 | int ret = 0; | 2188 | int ret = 0; |
2011 | 2189 | ||
2190 | if (set_mmio_spte(sptep, gfn, pfn, pte_access)) | ||
2191 | return 0; | ||
2192 | |||
2012 | /* | 2193 | /* |
2013 | * We don't set the accessed bit, since we sometimes want to see | 2194 | * We don't set the accessed bit, since we sometimes want to see |
2014 | * whether the guest actually used the pte (in order to detect | 2195 | * whether the guest actually used the pte (in order to detect |
@@ -2017,8 +2198,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2017 | spte = PT_PRESENT_MASK; | 2198 | spte = PT_PRESENT_MASK; |
2018 | if (!speculative) | 2199 | if (!speculative) |
2019 | spte |= shadow_accessed_mask; | 2200 | spte |= shadow_accessed_mask; |
2020 | if (!dirty) | 2201 | |
2021 | pte_access &= ~ACC_WRITE_MASK; | ||
2022 | if (pte_access & ACC_EXEC_MASK) | 2202 | if (pte_access & ACC_EXEC_MASK) |
2023 | spte |= shadow_x_mask; | 2203 | spte |= shadow_x_mask; |
2024 | else | 2204 | else |
@@ -2045,15 +2225,24 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2045 | if (level > PT_PAGE_TABLE_LEVEL && | 2225 | if (level > PT_PAGE_TABLE_LEVEL && |
2046 | has_wrprotected_page(vcpu->kvm, gfn, level)) { | 2226 | has_wrprotected_page(vcpu->kvm, gfn, level)) { |
2047 | ret = 1; | 2227 | ret = 1; |
2048 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | 2228 | drop_spte(vcpu->kvm, sptep); |
2049 | goto done; | 2229 | goto done; |
2050 | } | 2230 | } |
2051 | 2231 | ||
2052 | spte |= PT_WRITABLE_MASK; | 2232 | spte |= PT_WRITABLE_MASK; |
2053 | 2233 | ||
2054 | if (!vcpu->arch.mmu.direct_map | 2234 | if (!vcpu->arch.mmu.direct_map |
2055 | && !(pte_access & ACC_WRITE_MASK)) | 2235 | && !(pte_access & ACC_WRITE_MASK)) { |
2056 | spte &= ~PT_USER_MASK; | 2236 | spte &= ~PT_USER_MASK; |
2237 | /* | ||
2238 | * If we converted a user page to a kernel page, | ||
2239 | * so that the kernel can write to it when cr0.wp=0, | ||
2240 | * then we should prevent the kernel from executing it | ||
2241 | * if SMEP is enabled. | ||
2242 | */ | ||
2243 | if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
2244 | spte |= PT64_NX_MASK; | ||
2245 | } | ||
2057 | 2246 | ||
2058 | /* | 2247 | /* |
2059 | * Optimization: for pte sync, if spte was writable the hash | 2248 | * Optimization: for pte sync, if spte was writable the hash |
@@ -2078,7 +2267,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2078 | mark_page_dirty(vcpu->kvm, gfn); | 2267 | mark_page_dirty(vcpu->kvm, gfn); |
2079 | 2268 | ||
2080 | set_pte: | 2269 | set_pte: |
2081 | update_spte(sptep, spte); | 2270 | mmu_spte_update(sptep, spte); |
2082 | /* | 2271 | /* |
2083 | * If we overwrite a writable spte with a read-only one we | 2272 | * If we overwrite a writable spte with a read-only one we |
2084 | * should flush remote TLBs. Otherwise rmap_write_protect | 2273 | * should flush remote TLBs. Otherwise rmap_write_protect |
@@ -2093,8 +2282,8 @@ done: | |||
2093 | 2282 | ||
2094 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 2283 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
2095 | unsigned pt_access, unsigned pte_access, | 2284 | unsigned pt_access, unsigned pte_access, |
2096 | int user_fault, int write_fault, int dirty, | 2285 | int user_fault, int write_fault, |
2097 | int *ptwrite, int level, gfn_t gfn, | 2286 | int *emulate, int level, gfn_t gfn, |
2098 | pfn_t pfn, bool speculative, | 2287 | pfn_t pfn, bool speculative, |
2099 | bool host_writable) | 2288 | bool host_writable) |
2100 | { | 2289 | { |
@@ -2117,26 +2306,28 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2117 | u64 pte = *sptep; | 2306 | u64 pte = *sptep; |
2118 | 2307 | ||
2119 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 2308 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
2120 | mmu_page_remove_parent_pte(child, sptep); | 2309 | drop_parent_pte(child, sptep); |
2121 | __set_spte(sptep, shadow_trap_nonpresent_pte); | ||
2122 | kvm_flush_remote_tlbs(vcpu->kvm); | 2310 | kvm_flush_remote_tlbs(vcpu->kvm); |
2123 | } else if (pfn != spte_to_pfn(*sptep)) { | 2311 | } else if (pfn != spte_to_pfn(*sptep)) { |
2124 | pgprintk("hfn old %llx new %llx\n", | 2312 | pgprintk("hfn old %llx new %llx\n", |
2125 | spte_to_pfn(*sptep), pfn); | 2313 | spte_to_pfn(*sptep), pfn); |
2126 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | 2314 | drop_spte(vcpu->kvm, sptep); |
2127 | kvm_flush_remote_tlbs(vcpu->kvm); | 2315 | kvm_flush_remote_tlbs(vcpu->kvm); |
2128 | } else | 2316 | } else |
2129 | was_rmapped = 1; | 2317 | was_rmapped = 1; |
2130 | } | 2318 | } |
2131 | 2319 | ||
2132 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 2320 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
2133 | dirty, level, gfn, pfn, speculative, true, | 2321 | level, gfn, pfn, speculative, true, |
2134 | host_writable)) { | 2322 | host_writable)) { |
2135 | if (write_fault) | 2323 | if (write_fault) |
2136 | *ptwrite = 1; | 2324 | *emulate = 1; |
2137 | kvm_mmu_flush_tlb(vcpu); | 2325 | kvm_mmu_flush_tlb(vcpu); |
2138 | } | 2326 | } |
2139 | 2327 | ||
2328 | if (unlikely(is_mmio_spte(*sptep) && emulate)) | ||
2329 | *emulate = 1; | ||
2330 | |||
2140 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 2331 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
2141 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", | 2332 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", |
2142 | is_large_pte(*sptep)? "2MB" : "4kB", | 2333 | is_large_pte(*sptep)? "2MB" : "4kB", |
@@ -2145,11 +2336,13 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2145 | if (!was_rmapped && is_large_pte(*sptep)) | 2336 | if (!was_rmapped && is_large_pte(*sptep)) |
2146 | ++vcpu->kvm->stat.lpages; | 2337 | ++vcpu->kvm->stat.lpages; |
2147 | 2338 | ||
2148 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 2339 | if (is_shadow_present_pte(*sptep)) { |
2149 | if (!was_rmapped) { | 2340 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
2150 | rmap_count = rmap_add(vcpu, sptep, gfn); | 2341 | if (!was_rmapped) { |
2151 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 2342 | rmap_count = rmap_add(vcpu, sptep, gfn); |
2152 | rmap_recycle(vcpu, sptep, gfn); | 2343 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
2344 | rmap_recycle(vcpu, sptep, gfn); | ||
2345 | } | ||
2153 | } | 2346 | } |
2154 | kvm_release_pfn_clean(pfn); | 2347 | kvm_release_pfn_clean(pfn); |
2155 | if (speculative) { | 2348 | if (speculative) { |
@@ -2170,8 +2363,8 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2170 | 2363 | ||
2171 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); | 2364 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); |
2172 | if (!slot) { | 2365 | if (!slot) { |
2173 | get_page(bad_page); | 2366 | get_page(fault_page); |
2174 | return page_to_pfn(bad_page); | 2367 | return page_to_pfn(fault_page); |
2175 | } | 2368 | } |
2176 | 2369 | ||
2177 | hva = gfn_to_hva_memslot(slot, gfn); | 2370 | hva = gfn_to_hva_memslot(slot, gfn); |
@@ -2198,7 +2391,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
2198 | 2391 | ||
2199 | for (i = 0; i < ret; i++, gfn++, start++) | 2392 | for (i = 0; i < ret; i++, gfn++, start++) |
2200 | mmu_set_spte(vcpu, start, ACC_ALL, | 2393 | mmu_set_spte(vcpu, start, ACC_ALL, |
2201 | access, 0, 0, 1, NULL, | 2394 | access, 0, 0, NULL, |
2202 | sp->role.level, gfn, | 2395 | sp->role.level, gfn, |
2203 | page_to_pfn(pages[i]), true, true); | 2396 | page_to_pfn(pages[i]), true, true); |
2204 | 2397 | ||
@@ -2217,7 +2410,7 @@ static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, | |||
2217 | spte = sp->spt + i; | 2410 | spte = sp->spt + i; |
2218 | 2411 | ||
2219 | for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { | 2412 | for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { |
2220 | if (*spte != shadow_trap_nonpresent_pte || spte == sptep) { | 2413 | if (is_shadow_present_pte(*spte) || spte == sptep) { |
2221 | if (!start) | 2414 | if (!start) |
2222 | continue; | 2415 | continue; |
2223 | if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0) | 2416 | if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0) |
@@ -2254,7 +2447,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2254 | { | 2447 | { |
2255 | struct kvm_shadow_walk_iterator iterator; | 2448 | struct kvm_shadow_walk_iterator iterator; |
2256 | struct kvm_mmu_page *sp; | 2449 | struct kvm_mmu_page *sp; |
2257 | int pt_write = 0; | 2450 | int emulate = 0; |
2258 | gfn_t pseudo_gfn; | 2451 | gfn_t pseudo_gfn; |
2259 | 2452 | ||
2260 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2453 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
@@ -2262,14 +2455,14 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2262 | unsigned pte_access = ACC_ALL; | 2455 | unsigned pte_access = ACC_ALL; |
2263 | 2456 | ||
2264 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, | 2457 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, |
2265 | 0, write, 1, &pt_write, | 2458 | 0, write, &emulate, |
2266 | level, gfn, pfn, prefault, map_writable); | 2459 | level, gfn, pfn, prefault, map_writable); |
2267 | direct_pte_prefetch(vcpu, iterator.sptep); | 2460 | direct_pte_prefetch(vcpu, iterator.sptep); |
2268 | ++vcpu->stat.pf_fixed; | 2461 | ++vcpu->stat.pf_fixed; |
2269 | break; | 2462 | break; |
2270 | } | 2463 | } |
2271 | 2464 | ||
2272 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { | 2465 | if (!is_shadow_present_pte(*iterator.sptep)) { |
2273 | u64 base_addr = iterator.addr; | 2466 | u64 base_addr = iterator.addr; |
2274 | 2467 | ||
2275 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); | 2468 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); |
@@ -2283,14 +2476,14 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2283 | return -ENOMEM; | 2476 | return -ENOMEM; |
2284 | } | 2477 | } |
2285 | 2478 | ||
2286 | __set_spte(iterator.sptep, | 2479 | mmu_spte_set(iterator.sptep, |
2287 | __pa(sp->spt) | 2480 | __pa(sp->spt) |
2288 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | 2481 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
2289 | | shadow_user_mask | shadow_x_mask | 2482 | | shadow_user_mask | shadow_x_mask |
2290 | | shadow_accessed_mask); | 2483 | | shadow_accessed_mask); |
2291 | } | 2484 | } |
2292 | } | 2485 | } |
2293 | return pt_write; | 2486 | return emulate; |
2294 | } | 2487 | } |
2295 | 2488 | ||
2296 | static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) | 2489 | static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) |
@@ -2306,16 +2499,15 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * | |||
2306 | send_sig_info(SIGBUS, &info, tsk); | 2499 | send_sig_info(SIGBUS, &info, tsk); |
2307 | } | 2500 | } |
2308 | 2501 | ||
2309 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | 2502 | static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) |
2310 | { | 2503 | { |
2311 | kvm_release_pfn_clean(pfn); | 2504 | kvm_release_pfn_clean(pfn); |
2312 | if (is_hwpoison_pfn(pfn)) { | 2505 | if (is_hwpoison_pfn(pfn)) { |
2313 | kvm_send_hwpoison_signal(gfn_to_hva(kvm, gfn), current); | 2506 | kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); |
2314 | return 0; | 2507 | return 0; |
2315 | } else if (is_fault_pfn(pfn)) | 2508 | } |
2316 | return -EFAULT; | ||
2317 | 2509 | ||
2318 | return 1; | 2510 | return -EFAULT; |
2319 | } | 2511 | } |
2320 | 2512 | ||
2321 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | 2513 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, |
@@ -2360,6 +2552,30 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
2360 | } | 2552 | } |
2361 | } | 2553 | } |
2362 | 2554 | ||
2555 | static bool mmu_invalid_pfn(pfn_t pfn) | ||
2556 | { | ||
2557 | return unlikely(is_invalid_pfn(pfn)); | ||
2558 | } | ||
2559 | |||
2560 | static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, | ||
2561 | pfn_t pfn, unsigned access, int *ret_val) | ||
2562 | { | ||
2563 | bool ret = true; | ||
2564 | |||
2565 | /* The pfn is invalid, report the error! */ | ||
2566 | if (unlikely(is_invalid_pfn(pfn))) { | ||
2567 | *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn); | ||
2568 | goto exit; | ||
2569 | } | ||
2570 | |||
2571 | if (unlikely(is_noslot_pfn(pfn))) | ||
2572 | vcpu_cache_mmio_info(vcpu, gva, gfn, access); | ||
2573 | |||
2574 | ret = false; | ||
2575 | exit: | ||
2576 | return ret; | ||
2577 | } | ||
2578 | |||
2363 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 2579 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2364 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | 2580 | gva_t gva, pfn_t *pfn, bool write, bool *writable); |
2365 | 2581 | ||
@@ -2394,9 +2610,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | |||
2394 | if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) | 2610 | if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) |
2395 | return 0; | 2611 | return 0; |
2396 | 2612 | ||
2397 | /* mmio */ | 2613 | if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) |
2398 | if (is_error_pfn(pfn)) | 2614 | return r; |
2399 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); | ||
2400 | 2615 | ||
2401 | spin_lock(&vcpu->kvm->mmu_lock); | 2616 | spin_lock(&vcpu->kvm->mmu_lock); |
2402 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2617 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
@@ -2623,6 +2838,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2623 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2838 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2624 | return; | 2839 | return; |
2625 | 2840 | ||
2841 | vcpu_clear_mmio_info(vcpu, ~0ul); | ||
2626 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | 2842 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); |
2627 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 2843 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { |
2628 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2844 | hpa_t root = vcpu->arch.mmu.root_hpa; |
@@ -2667,6 +2883,94 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
2667 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); | 2883 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); |
2668 | } | 2884 | } |
2669 | 2885 | ||
2886 | static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) | ||
2887 | { | ||
2888 | if (direct) | ||
2889 | return vcpu_match_mmio_gpa(vcpu, addr); | ||
2890 | |||
2891 | return vcpu_match_mmio_gva(vcpu, addr); | ||
2892 | } | ||
2893 | |||
2894 | |||
2895 | /* | ||
2896 | * On direct hosts, the last spte is only allows two states | ||
2897 | * for mmio page fault: | ||
2898 | * - It is the mmio spte | ||
2899 | * - It is zapped or it is being zapped. | ||
2900 | * | ||
2901 | * This function completely checks the spte when the last spte | ||
2902 | * is not the mmio spte. | ||
2903 | */ | ||
2904 | static bool check_direct_spte_mmio_pf(u64 spte) | ||
2905 | { | ||
2906 | return __check_direct_spte_mmio_pf(spte); | ||
2907 | } | ||
2908 | |||
2909 | static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) | ||
2910 | { | ||
2911 | struct kvm_shadow_walk_iterator iterator; | ||
2912 | u64 spte = 0ull; | ||
2913 | |||
2914 | walk_shadow_page_lockless_begin(vcpu); | ||
2915 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) | ||
2916 | if (!is_shadow_present_pte(spte)) | ||
2917 | break; | ||
2918 | walk_shadow_page_lockless_end(vcpu); | ||
2919 | |||
2920 | return spte; | ||
2921 | } | ||
2922 | |||
2923 | /* | ||
2924 | * If it is a real mmio page fault, return 1 and emulat the instruction | ||
2925 | * directly, return 0 to let CPU fault again on the address, -1 is | ||
2926 | * returned if bug is detected. | ||
2927 | */ | ||
2928 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) | ||
2929 | { | ||
2930 | u64 spte; | ||
2931 | |||
2932 | if (quickly_check_mmio_pf(vcpu, addr, direct)) | ||
2933 | return 1; | ||
2934 | |||
2935 | spte = walk_shadow_page_get_mmio_spte(vcpu, addr); | ||
2936 | |||
2937 | if (is_mmio_spte(spte)) { | ||
2938 | gfn_t gfn = get_mmio_spte_gfn(spte); | ||
2939 | unsigned access = get_mmio_spte_access(spte); | ||
2940 | |||
2941 | if (direct) | ||
2942 | addr = 0; | ||
2943 | |||
2944 | trace_handle_mmio_page_fault(addr, gfn, access); | ||
2945 | vcpu_cache_mmio_info(vcpu, addr, gfn, access); | ||
2946 | return 1; | ||
2947 | } | ||
2948 | |||
2949 | /* | ||
2950 | * It's ok if the gva is remapped by other cpus on shadow guest, | ||
2951 | * it's a BUG if the gfn is not a mmio page. | ||
2952 | */ | ||
2953 | if (direct && !check_direct_spte_mmio_pf(spte)) | ||
2954 | return -1; | ||
2955 | |||
2956 | /* | ||
2957 | * If the page table is zapped by other cpus, let CPU fault again on | ||
2958 | * the address. | ||
2959 | */ | ||
2960 | return 0; | ||
2961 | } | ||
2962 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); | ||
2963 | |||
2964 | static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, | ||
2965 | u32 error_code, bool direct) | ||
2966 | { | ||
2967 | int ret; | ||
2968 | |||
2969 | ret = handle_mmio_page_fault_common(vcpu, addr, direct); | ||
2970 | WARN_ON(ret < 0); | ||
2971 | return ret; | ||
2972 | } | ||
2973 | |||
2670 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 2974 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
2671 | u32 error_code, bool prefault) | 2975 | u32 error_code, bool prefault) |
2672 | { | 2976 | { |
@@ -2674,6 +2978,10 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
2674 | int r; | 2978 | int r; |
2675 | 2979 | ||
2676 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); | 2980 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
2981 | |||
2982 | if (unlikely(error_code & PFERR_RSVD_MASK)) | ||
2983 | return handle_mmio_page_fault(vcpu, gva, error_code, true); | ||
2984 | |||
2677 | r = mmu_topup_memory_caches(vcpu); | 2985 | r = mmu_topup_memory_caches(vcpu); |
2678 | if (r) | 2986 | if (r) |
2679 | return r; | 2987 | return r; |
@@ -2750,6 +3058,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
2750 | ASSERT(vcpu); | 3058 | ASSERT(vcpu); |
2751 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3059 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
2752 | 3060 | ||
3061 | if (unlikely(error_code & PFERR_RSVD_MASK)) | ||
3062 | return handle_mmio_page_fault(vcpu, gpa, error_code, true); | ||
3063 | |||
2753 | r = mmu_topup_memory_caches(vcpu); | 3064 | r = mmu_topup_memory_caches(vcpu); |
2754 | if (r) | 3065 | if (r) |
2755 | return r; | 3066 | return r; |
@@ -2767,9 +3078,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
2767 | if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) | 3078 | if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) |
2768 | return 0; | 3079 | return 0; |
2769 | 3080 | ||
2770 | /* mmio */ | 3081 | if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) |
2771 | if (is_error_pfn(pfn)) | 3082 | return r; |
2772 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); | 3083 | |
2773 | spin_lock(&vcpu->kvm->mmu_lock); | 3084 | spin_lock(&vcpu->kvm->mmu_lock); |
2774 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 3085 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2775 | goto out_unlock; | 3086 | goto out_unlock; |
@@ -2800,7 +3111,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu, | |||
2800 | context->page_fault = nonpaging_page_fault; | 3111 | context->page_fault = nonpaging_page_fault; |
2801 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 3112 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
2802 | context->free = nonpaging_free; | 3113 | context->free = nonpaging_free; |
2803 | context->prefetch_page = nonpaging_prefetch_page; | ||
2804 | context->sync_page = nonpaging_sync_page; | 3114 | context->sync_page = nonpaging_sync_page; |
2805 | context->invlpg = nonpaging_invlpg; | 3115 | context->invlpg = nonpaging_invlpg; |
2806 | context->update_pte = nonpaging_update_pte; | 3116 | context->update_pte = nonpaging_update_pte; |
@@ -2848,6 +3158,23 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) | |||
2848 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; | 3158 | return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; |
2849 | } | 3159 | } |
2850 | 3160 | ||
3161 | static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | ||
3162 | int *nr_present) | ||
3163 | { | ||
3164 | if (unlikely(is_mmio_spte(*sptep))) { | ||
3165 | if (gfn != get_mmio_spte_gfn(*sptep)) { | ||
3166 | mmu_spte_clear_no_track(sptep); | ||
3167 | return true; | ||
3168 | } | ||
3169 | |||
3170 | (*nr_present)++; | ||
3171 | mark_mmio_spte(sptep, gfn, access); | ||
3172 | return true; | ||
3173 | } | ||
3174 | |||
3175 | return false; | ||
3176 | } | ||
3177 | |||
2851 | #define PTTYPE 64 | 3178 | #define PTTYPE 64 |
2852 | #include "paging_tmpl.h" | 3179 | #include "paging_tmpl.h" |
2853 | #undef PTTYPE | 3180 | #undef PTTYPE |
@@ -2930,7 +3257,6 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
2930 | context->new_cr3 = paging_new_cr3; | 3257 | context->new_cr3 = paging_new_cr3; |
2931 | context->page_fault = paging64_page_fault; | 3258 | context->page_fault = paging64_page_fault; |
2932 | context->gva_to_gpa = paging64_gva_to_gpa; | 3259 | context->gva_to_gpa = paging64_gva_to_gpa; |
2933 | context->prefetch_page = paging64_prefetch_page; | ||
2934 | context->sync_page = paging64_sync_page; | 3260 | context->sync_page = paging64_sync_page; |
2935 | context->invlpg = paging64_invlpg; | 3261 | context->invlpg = paging64_invlpg; |
2936 | context->update_pte = paging64_update_pte; | 3262 | context->update_pte = paging64_update_pte; |
@@ -2959,7 +3285,6 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
2959 | context->page_fault = paging32_page_fault; | 3285 | context->page_fault = paging32_page_fault; |
2960 | context->gva_to_gpa = paging32_gva_to_gpa; | 3286 | context->gva_to_gpa = paging32_gva_to_gpa; |
2961 | context->free = paging_free; | 3287 | context->free = paging_free; |
2962 | context->prefetch_page = paging32_prefetch_page; | ||
2963 | context->sync_page = paging32_sync_page; | 3288 | context->sync_page = paging32_sync_page; |
2964 | context->invlpg = paging32_invlpg; | 3289 | context->invlpg = paging32_invlpg; |
2965 | context->update_pte = paging32_update_pte; | 3290 | context->update_pte = paging32_update_pte; |
@@ -2984,7 +3309,6 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2984 | context->new_cr3 = nonpaging_new_cr3; | 3309 | context->new_cr3 = nonpaging_new_cr3; |
2985 | context->page_fault = tdp_page_fault; | 3310 | context->page_fault = tdp_page_fault; |
2986 | context->free = nonpaging_free; | 3311 | context->free = nonpaging_free; |
2987 | context->prefetch_page = nonpaging_prefetch_page; | ||
2988 | context->sync_page = nonpaging_sync_page; | 3312 | context->sync_page = nonpaging_sync_page; |
2989 | context->invlpg = nonpaging_invlpg; | 3313 | context->invlpg = nonpaging_invlpg; |
2990 | context->update_pte = nonpaging_update_pte; | 3314 | context->update_pte = nonpaging_update_pte; |
@@ -3023,6 +3347,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3023 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | 3347 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) |
3024 | { | 3348 | { |
3025 | int r; | 3349 | int r; |
3350 | bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | ||
3026 | ASSERT(vcpu); | 3351 | ASSERT(vcpu); |
3027 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3352 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3028 | 3353 | ||
@@ -3037,6 +3362,8 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
3037 | 3362 | ||
3038 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 3363 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
3039 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 3364 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); |
3365 | vcpu->arch.mmu.base_role.smep_andnot_wp | ||
3366 | = smep && !is_write_protection(vcpu); | ||
3040 | 3367 | ||
3041 | return r; | 3368 | return r; |
3042 | } | 3369 | } |
@@ -3141,27 +3468,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu) | |||
3141 | } | 3468 | } |
3142 | EXPORT_SYMBOL_GPL(kvm_mmu_unload); | 3469 | EXPORT_SYMBOL_GPL(kvm_mmu_unload); |
3143 | 3470 | ||
3144 | static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | ||
3145 | struct kvm_mmu_page *sp, | ||
3146 | u64 *spte) | ||
3147 | { | ||
3148 | u64 pte; | ||
3149 | struct kvm_mmu_page *child; | ||
3150 | |||
3151 | pte = *spte; | ||
3152 | if (is_shadow_present_pte(pte)) { | ||
3153 | if (is_last_spte(pte, sp->role.level)) | ||
3154 | drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte); | ||
3155 | else { | ||
3156 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
3157 | mmu_page_remove_parent_pte(child, spte); | ||
3158 | } | ||
3159 | } | ||
3160 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
3161 | if (is_large_pte(pte)) | ||
3162 | --vcpu->kvm->stat.lpages; | ||
3163 | } | ||
3164 | |||
3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 3471 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
3166 | struct kvm_mmu_page *sp, u64 *spte, | 3472 | struct kvm_mmu_page *sp, u64 *spte, |
3167 | const void *new) | 3473 | const void *new) |
@@ -3233,6 +3539,13 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3233 | int level, npte, invlpg_counter, r, flooded = 0; | 3539 | int level, npte, invlpg_counter, r, flooded = 0; |
3234 | bool remote_flush, local_flush, zap_page; | 3540 | bool remote_flush, local_flush, zap_page; |
3235 | 3541 | ||
3542 | /* | ||
3543 | * If we don't have indirect shadow pages, it means no page is | ||
3544 | * write-protected, so we can exit simply. | ||
3545 | */ | ||
3546 | if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) | ||
3547 | return; | ||
3548 | |||
3236 | zap_page = remote_flush = local_flush = false; | 3549 | zap_page = remote_flush = local_flush = false; |
3237 | offset = offset_in_page(gpa); | 3550 | offset = offset_in_page(gpa); |
3238 | 3551 | ||
@@ -3336,7 +3649,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3336 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 3649 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
3337 | while (npte--) { | 3650 | while (npte--) { |
3338 | entry = *spte; | 3651 | entry = *spte; |
3339 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 3652 | mmu_page_zap_pte(vcpu->kvm, sp, spte); |
3340 | if (gentry && | 3653 | if (gentry && |
3341 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3654 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3342 | & mask.word)) | 3655 | & mask.word)) |
@@ -3380,9 +3693,9 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
3380 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | 3693 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, |
3381 | struct kvm_mmu_page, link); | 3694 | struct kvm_mmu_page, link); |
3382 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); | 3695 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
3383 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
3384 | ++vcpu->kvm->stat.mmu_recycled; | 3696 | ++vcpu->kvm->stat.mmu_recycled; |
3385 | } | 3697 | } |
3698 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
3386 | } | 3699 | } |
3387 | 3700 | ||
3388 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, | 3701 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
@@ -3506,15 +3819,15 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3506 | continue; | 3819 | continue; |
3507 | 3820 | ||
3508 | if (is_large_pte(pt[i])) { | 3821 | if (is_large_pte(pt[i])) { |
3509 | drop_spte(kvm, &pt[i], | 3822 | drop_spte(kvm, &pt[i]); |
3510 | shadow_trap_nonpresent_pte); | ||
3511 | --kvm->stat.lpages; | 3823 | --kvm->stat.lpages; |
3512 | continue; | 3824 | continue; |
3513 | } | 3825 | } |
3514 | 3826 | ||
3515 | /* avoid RMW */ | 3827 | /* avoid RMW */ |
3516 | if (is_writable_pte(pt[i])) | 3828 | if (is_writable_pte(pt[i])) |
3517 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); | 3829 | mmu_spte_update(&pt[i], |
3830 | pt[i] & ~PT_WRITABLE_MASK); | ||
3518 | } | 3831 | } |
3519 | } | 3832 | } |
3520 | kvm_flush_remote_tlbs(kvm); | 3833 | kvm_flush_remote_tlbs(kvm); |
@@ -3590,25 +3903,18 @@ static struct shrinker mmu_shrinker = { | |||
3590 | 3903 | ||
3591 | static void mmu_destroy_caches(void) | 3904 | static void mmu_destroy_caches(void) |
3592 | { | 3905 | { |
3593 | if (pte_chain_cache) | 3906 | if (pte_list_desc_cache) |
3594 | kmem_cache_destroy(pte_chain_cache); | 3907 | kmem_cache_destroy(pte_list_desc_cache); |
3595 | if (rmap_desc_cache) | ||
3596 | kmem_cache_destroy(rmap_desc_cache); | ||
3597 | if (mmu_page_header_cache) | 3908 | if (mmu_page_header_cache) |
3598 | kmem_cache_destroy(mmu_page_header_cache); | 3909 | kmem_cache_destroy(mmu_page_header_cache); |
3599 | } | 3910 | } |
3600 | 3911 | ||
3601 | int kvm_mmu_module_init(void) | 3912 | int kvm_mmu_module_init(void) |
3602 | { | 3913 | { |
3603 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | 3914 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", |
3604 | sizeof(struct kvm_pte_chain), | 3915 | sizeof(struct pte_list_desc), |
3605 | 0, 0, NULL); | ||
3606 | if (!pte_chain_cache) | ||
3607 | goto nomem; | ||
3608 | rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", | ||
3609 | sizeof(struct kvm_rmap_desc), | ||
3610 | 0, 0, NULL); | 3916 | 0, 0, NULL); |
3611 | if (!rmap_desc_cache) | 3917 | if (!pte_list_desc_cache) |
3612 | goto nomem; | 3918 | goto nomem; |
3613 | 3919 | ||
3614 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", | 3920 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", |
@@ -3775,16 +4081,17 @@ out: | |||
3775 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | 4081 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) |
3776 | { | 4082 | { |
3777 | struct kvm_shadow_walk_iterator iterator; | 4083 | struct kvm_shadow_walk_iterator iterator; |
4084 | u64 spte; | ||
3778 | int nr_sptes = 0; | 4085 | int nr_sptes = 0; |
3779 | 4086 | ||
3780 | spin_lock(&vcpu->kvm->mmu_lock); | 4087 | walk_shadow_page_lockless_begin(vcpu); |
3781 | for_each_shadow_entry(vcpu, addr, iterator) { | 4088 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { |
3782 | sptes[iterator.level-1] = *iterator.sptep; | 4089 | sptes[iterator.level-1] = spte; |
3783 | nr_sptes++; | 4090 | nr_sptes++; |
3784 | if (!is_shadow_present_pte(*iterator.sptep)) | 4091 | if (!is_shadow_present_pte(spte)) |
3785 | break; | 4092 | break; |
3786 | } | 4093 | } |
3787 | spin_unlock(&vcpu->kvm->mmu_lock); | 4094 | walk_shadow_page_lockless_end(vcpu); |
3788 | 4095 | ||
3789 | return nr_sptes; | 4096 | return nr_sptes; |
3790 | } | 4097 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 7086ca85d3e7..e374db9af021 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -49,6 +49,8 @@ | |||
49 | #define PFERR_FETCH_MASK (1U << 4) | 49 | #define PFERR_FETCH_MASK (1U << 4) |
50 | 50 | ||
51 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); | 51 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); |
52 | void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); | ||
53 | int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); | ||
52 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 54 | int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
53 | 55 | ||
54 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 56 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
@@ -76,4 +78,27 @@ static inline int is_present_gpte(unsigned long pte) | |||
76 | return pte & PT_PRESENT_MASK; | 78 | return pte & PT_PRESENT_MASK; |
77 | } | 79 | } |
78 | 80 | ||
81 | static inline int is_writable_pte(unsigned long pte) | ||
82 | { | ||
83 | return pte & PT_WRITABLE_MASK; | ||
84 | } | ||
85 | |||
86 | static inline bool is_write_protection(struct kvm_vcpu *vcpu) | ||
87 | { | ||
88 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | ||
89 | } | ||
90 | |||
91 | static inline bool check_write_user_access(struct kvm_vcpu *vcpu, | ||
92 | bool write_fault, bool user_fault, | ||
93 | unsigned long pte) | ||
94 | { | ||
95 | if (unlikely(write_fault && !is_writable_pte(pte) | ||
96 | && (user_fault || is_write_protection(vcpu)))) | ||
97 | return false; | ||
98 | |||
99 | if (unlikely(user_fault && !(pte & PT_USER_MASK))) | ||
100 | return false; | ||
101 | |||
102 | return true; | ||
103 | } | ||
79 | #endif | 104 | #endif |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 5f6223b8bcf7..2460a265be23 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -99,18 +99,6 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) | |||
99 | "level = %d\n", sp, level); | 99 | "level = %d\n", sp, level); |
100 | return; | 100 | return; |
101 | } | 101 | } |
102 | |||
103 | if (*sptep == shadow_notrap_nonpresent_pte) { | ||
104 | audit_printk(vcpu->kvm, "notrap spte in unsync " | ||
105 | "sp: %p\n", sp); | ||
106 | return; | ||
107 | } | ||
108 | } | ||
109 | |||
110 | if (sp->role.direct && *sptep == shadow_notrap_nonpresent_pte) { | ||
111 | audit_printk(vcpu->kvm, "notrap spte in direct sp: %p\n", | ||
112 | sp); | ||
113 | return; | ||
114 | } | 102 | } |
115 | 103 | ||
116 | if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level)) | 104 | if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level)) |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index b60b4fdb3eda..eed67f34146d 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -196,6 +196,54 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, | |||
196 | TP_ARGS(sp) | 196 | TP_ARGS(sp) |
197 | ); | 197 | ); |
198 | 198 | ||
199 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages, | ||
200 | TP_PROTO(struct kvm_mmu_page *sp), | ||
201 | |||
202 | TP_ARGS(sp) | ||
203 | ); | ||
204 | |||
205 | TRACE_EVENT( | ||
206 | mark_mmio_spte, | ||
207 | TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), | ||
208 | TP_ARGS(sptep, gfn, access), | ||
209 | |||
210 | TP_STRUCT__entry( | ||
211 | __field(void *, sptep) | ||
212 | __field(gfn_t, gfn) | ||
213 | __field(unsigned, access) | ||
214 | ), | ||
215 | |||
216 | TP_fast_assign( | ||
217 | __entry->sptep = sptep; | ||
218 | __entry->gfn = gfn; | ||
219 | __entry->access = access; | ||
220 | ), | ||
221 | |||
222 | TP_printk("sptep:%p gfn %llx access %x", __entry->sptep, __entry->gfn, | ||
223 | __entry->access) | ||
224 | ); | ||
225 | |||
226 | TRACE_EVENT( | ||
227 | handle_mmio_page_fault, | ||
228 | TP_PROTO(u64 addr, gfn_t gfn, unsigned access), | ||
229 | TP_ARGS(addr, gfn, access), | ||
230 | |||
231 | TP_STRUCT__entry( | ||
232 | __field(u64, addr) | ||
233 | __field(gfn_t, gfn) | ||
234 | __field(unsigned, access) | ||
235 | ), | ||
236 | |||
237 | TP_fast_assign( | ||
238 | __entry->addr = addr; | ||
239 | __entry->gfn = gfn; | ||
240 | __entry->access = access; | ||
241 | ), | ||
242 | |||
243 | TP_printk("addr:%llx gfn %llx access %x", __entry->addr, __entry->gfn, | ||
244 | __entry->access) | ||
245 | ); | ||
246 | |||
199 | TRACE_EVENT( | 247 | TRACE_EVENT( |
200 | kvm_mmu_audit, | 248 | kvm_mmu_audit, |
201 | TP_PROTO(struct kvm_vcpu *vcpu, int audit_point), | 249 | TP_PROTO(struct kvm_vcpu *vcpu, int audit_point), |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 9d03ad4dd5ec..507e2b844cfa 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -101,11 +101,15 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
101 | return (ret != orig_pte); | 101 | return (ret != orig_pte); |
102 | } | 102 | } |
103 | 103 | ||
104 | static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) | 104 | static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, |
105 | bool last) | ||
105 | { | 106 | { |
106 | unsigned access; | 107 | unsigned access; |
107 | 108 | ||
108 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; | 109 | access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; |
110 | if (last && !is_dirty_gpte(gpte)) | ||
111 | access &= ~ACC_WRITE_MASK; | ||
112 | |||
109 | #if PTTYPE == 64 | 113 | #if PTTYPE == 64 |
110 | if (vcpu->arch.mmu.nx) | 114 | if (vcpu->arch.mmu.nx) |
111 | access &= ~(gpte >> PT64_NX_SHIFT); | 115 | access &= ~(gpte >> PT64_NX_SHIFT); |
@@ -113,6 +117,24 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) | |||
113 | return access; | 117 | return access; |
114 | } | 118 | } |
115 | 119 | ||
120 | static bool FNAME(is_last_gpte)(struct guest_walker *walker, | ||
121 | struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | ||
122 | pt_element_t gpte) | ||
123 | { | ||
124 | if (walker->level == PT_PAGE_TABLE_LEVEL) | ||
125 | return true; | ||
126 | |||
127 | if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && | ||
128 | (PTTYPE == 64 || is_pse(vcpu))) | ||
129 | return true; | ||
130 | |||
131 | if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && | ||
132 | (mmu->root_level == PT64_ROOT_LEVEL)) | ||
133 | return true; | ||
134 | |||
135 | return false; | ||
136 | } | ||
137 | |||
116 | /* | 138 | /* |
117 | * Fetch a guest pte for a guest virtual address | 139 | * Fetch a guest pte for a guest virtual address |
118 | */ | 140 | */ |
@@ -125,18 +147,17 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, | |||
125 | gfn_t table_gfn; | 147 | gfn_t table_gfn; |
126 | unsigned index, pt_access, uninitialized_var(pte_access); | 148 | unsigned index, pt_access, uninitialized_var(pte_access); |
127 | gpa_t pte_gpa; | 149 | gpa_t pte_gpa; |
128 | bool eperm, present, rsvd_fault; | 150 | bool eperm; |
129 | int offset, write_fault, user_fault, fetch_fault; | 151 | int offset; |
130 | 152 | const int write_fault = access & PFERR_WRITE_MASK; | |
131 | write_fault = access & PFERR_WRITE_MASK; | 153 | const int user_fault = access & PFERR_USER_MASK; |
132 | user_fault = access & PFERR_USER_MASK; | 154 | const int fetch_fault = access & PFERR_FETCH_MASK; |
133 | fetch_fault = access & PFERR_FETCH_MASK; | 155 | u16 errcode = 0; |
134 | 156 | ||
135 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, | 157 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, |
136 | fetch_fault); | 158 | fetch_fault); |
137 | walk: | 159 | retry_walk: |
138 | present = true; | 160 | eperm = false; |
139 | eperm = rsvd_fault = false; | ||
140 | walker->level = mmu->root_level; | 161 | walker->level = mmu->root_level; |
141 | pte = mmu->get_cr3(vcpu); | 162 | pte = mmu->get_cr3(vcpu); |
142 | 163 | ||
@@ -144,10 +165,8 @@ walk: | |||
144 | if (walker->level == PT32E_ROOT_LEVEL) { | 165 | if (walker->level == PT32E_ROOT_LEVEL) { |
145 | pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3); | 166 | pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3); |
146 | trace_kvm_mmu_paging_element(pte, walker->level); | 167 | trace_kvm_mmu_paging_element(pte, walker->level); |
147 | if (!is_present_gpte(pte)) { | 168 | if (!is_present_gpte(pte)) |
148 | present = false; | ||
149 | goto error; | 169 | goto error; |
150 | } | ||
151 | --walker->level; | 170 | --walker->level; |
152 | } | 171 | } |
153 | #endif | 172 | #endif |
@@ -170,42 +189,31 @@ walk: | |||
170 | 189 | ||
171 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), | 190 | real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), |
172 | PFERR_USER_MASK|PFERR_WRITE_MASK); | 191 | PFERR_USER_MASK|PFERR_WRITE_MASK); |
173 | if (unlikely(real_gfn == UNMAPPED_GVA)) { | 192 | if (unlikely(real_gfn == UNMAPPED_GVA)) |
174 | present = false; | 193 | goto error; |
175 | break; | ||
176 | } | ||
177 | real_gfn = gpa_to_gfn(real_gfn); | 194 | real_gfn = gpa_to_gfn(real_gfn); |
178 | 195 | ||
179 | host_addr = gfn_to_hva(vcpu->kvm, real_gfn); | 196 | host_addr = gfn_to_hva(vcpu->kvm, real_gfn); |
180 | if (unlikely(kvm_is_error_hva(host_addr))) { | 197 | if (unlikely(kvm_is_error_hva(host_addr))) |
181 | present = false; | 198 | goto error; |
182 | break; | ||
183 | } | ||
184 | 199 | ||
185 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); | 200 | ptep_user = (pt_element_t __user *)((void *)host_addr + offset); |
186 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) { | 201 | if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) |
187 | present = false; | 202 | goto error; |
188 | break; | ||
189 | } | ||
190 | 203 | ||
191 | trace_kvm_mmu_paging_element(pte, walker->level); | 204 | trace_kvm_mmu_paging_element(pte, walker->level); |
192 | 205 | ||
193 | if (unlikely(!is_present_gpte(pte))) { | 206 | if (unlikely(!is_present_gpte(pte))) |
194 | present = false; | 207 | goto error; |
195 | break; | ||
196 | } | ||
197 | 208 | ||
198 | if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, | 209 | if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte, |
199 | walker->level))) { | 210 | walker->level))) { |
200 | rsvd_fault = true; | 211 | errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; |
201 | break; | 212 | goto error; |
202 | } | 213 | } |
203 | 214 | ||
204 | if (unlikely(write_fault && !is_writable_pte(pte) | 215 | if (!check_write_user_access(vcpu, write_fault, user_fault, |
205 | && (user_fault || is_write_protection(vcpu)))) | 216 | pte)) |
206 | eperm = true; | ||
207 | |||
208 | if (unlikely(user_fault && !(pte & PT_USER_MASK))) | ||
209 | eperm = true; | 217 | eperm = true; |
210 | 218 | ||
211 | #if PTTYPE == 64 | 219 | #if PTTYPE == 64 |
@@ -213,39 +221,35 @@ walk: | |||
213 | eperm = true; | 221 | eperm = true; |
214 | #endif | 222 | #endif |
215 | 223 | ||
216 | if (!eperm && !rsvd_fault | 224 | if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { |
217 | && unlikely(!(pte & PT_ACCESSED_MASK))) { | ||
218 | int ret; | 225 | int ret; |
219 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 226 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
220 | sizeof(pte)); | 227 | sizeof(pte)); |
221 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, | 228 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, |
222 | pte, pte|PT_ACCESSED_MASK); | 229 | pte, pte|PT_ACCESSED_MASK); |
223 | if (unlikely(ret < 0)) { | 230 | if (unlikely(ret < 0)) |
224 | present = false; | 231 | goto error; |
225 | break; | 232 | else if (ret) |
226 | } else if (ret) | 233 | goto retry_walk; |
227 | goto walk; | ||
228 | 234 | ||
229 | mark_page_dirty(vcpu->kvm, table_gfn); | 235 | mark_page_dirty(vcpu->kvm, table_gfn); |
230 | pte |= PT_ACCESSED_MASK; | 236 | pte |= PT_ACCESSED_MASK; |
231 | } | 237 | } |
232 | 238 | ||
233 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte); | ||
234 | |||
235 | walker->ptes[walker->level - 1] = pte; | 239 | walker->ptes[walker->level - 1] = pte; |
236 | 240 | ||
237 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || | 241 | if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) { |
238 | ((walker->level == PT_DIRECTORY_LEVEL) && | ||
239 | is_large_pte(pte) && | ||
240 | (PTTYPE == 64 || is_pse(vcpu))) || | ||
241 | ((walker->level == PT_PDPE_LEVEL) && | ||
242 | is_large_pte(pte) && | ||
243 | mmu->root_level == PT64_ROOT_LEVEL)) { | ||
244 | int lvl = walker->level; | 242 | int lvl = walker->level; |
245 | gpa_t real_gpa; | 243 | gpa_t real_gpa; |
246 | gfn_t gfn; | 244 | gfn_t gfn; |
247 | u32 ac; | 245 | u32 ac; |
248 | 246 | ||
247 | /* check if the kernel is fetching from user page */ | ||
248 | if (unlikely(pte_access & PT_USER_MASK) && | ||
249 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) | ||
250 | if (fetch_fault && !user_fault) | ||
251 | eperm = true; | ||
252 | |||
249 | gfn = gpte_to_gfn_lvl(pte, lvl); | 253 | gfn = gpte_to_gfn_lvl(pte, lvl); |
250 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; | 254 | gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; |
251 | 255 | ||
@@ -266,12 +270,14 @@ walk: | |||
266 | break; | 270 | break; |
267 | } | 271 | } |
268 | 272 | ||
269 | pt_access = pte_access; | 273 | pt_access &= FNAME(gpte_access)(vcpu, pte, false); |
270 | --walker->level; | 274 | --walker->level; |
271 | } | 275 | } |
272 | 276 | ||
273 | if (unlikely(!present || eperm || rsvd_fault)) | 277 | if (unlikely(eperm)) { |
278 | errcode |= PFERR_PRESENT_MASK; | ||
274 | goto error; | 279 | goto error; |
280 | } | ||
275 | 281 | ||
276 | if (write_fault && unlikely(!is_dirty_gpte(pte))) { | 282 | if (write_fault && unlikely(!is_dirty_gpte(pte))) { |
277 | int ret; | 283 | int ret; |
@@ -279,17 +285,17 @@ walk: | |||
279 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 285 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
280 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, | 286 | ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, |
281 | pte, pte|PT_DIRTY_MASK); | 287 | pte, pte|PT_DIRTY_MASK); |
282 | if (unlikely(ret < 0)) { | 288 | if (unlikely(ret < 0)) |
283 | present = false; | ||
284 | goto error; | 289 | goto error; |
285 | } else if (ret) | 290 | else if (ret) |
286 | goto walk; | 291 | goto retry_walk; |
287 | 292 | ||
288 | mark_page_dirty(vcpu->kvm, table_gfn); | 293 | mark_page_dirty(vcpu->kvm, table_gfn); |
289 | pte |= PT_DIRTY_MASK; | 294 | pte |= PT_DIRTY_MASK; |
290 | walker->ptes[walker->level - 1] = pte; | 295 | walker->ptes[walker->level - 1] = pte; |
291 | } | 296 | } |
292 | 297 | ||
298 | pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true); | ||
293 | walker->pt_access = pt_access; | 299 | walker->pt_access = pt_access; |
294 | walker->pte_access = pte_access; | 300 | walker->pte_access = pte_access; |
295 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 301 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
@@ -297,19 +303,14 @@ walk: | |||
297 | return 1; | 303 | return 1; |
298 | 304 | ||
299 | error: | 305 | error: |
306 | errcode |= write_fault | user_fault; | ||
307 | if (fetch_fault && (mmu->nx || | ||
308 | kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))) | ||
309 | errcode |= PFERR_FETCH_MASK; | ||
310 | |||
300 | walker->fault.vector = PF_VECTOR; | 311 | walker->fault.vector = PF_VECTOR; |
301 | walker->fault.error_code_valid = true; | 312 | walker->fault.error_code_valid = true; |
302 | walker->fault.error_code = 0; | 313 | walker->fault.error_code = errcode; |
303 | if (present) | ||
304 | walker->fault.error_code |= PFERR_PRESENT_MASK; | ||
305 | |||
306 | walker->fault.error_code |= write_fault | user_fault; | ||
307 | |||
308 | if (fetch_fault && mmu->nx) | ||
309 | walker->fault.error_code |= PFERR_FETCH_MASK; | ||
310 | if (rsvd_fault) | ||
311 | walker->fault.error_code |= PFERR_RSVD_MASK; | ||
312 | |||
313 | walker->fault.address = addr; | 314 | walker->fault.address = addr; |
314 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; | 315 | walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu; |
315 | 316 | ||
@@ -336,16 +337,11 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | |||
336 | struct kvm_mmu_page *sp, u64 *spte, | 337 | struct kvm_mmu_page *sp, u64 *spte, |
337 | pt_element_t gpte) | 338 | pt_element_t gpte) |
338 | { | 339 | { |
339 | u64 nonpresent = shadow_trap_nonpresent_pte; | ||
340 | |||
341 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) | 340 | if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) |
342 | goto no_present; | 341 | goto no_present; |
343 | 342 | ||
344 | if (!is_present_gpte(gpte)) { | 343 | if (!is_present_gpte(gpte)) |
345 | if (!sp->unsync) | ||
346 | nonpresent = shadow_notrap_nonpresent_pte; | ||
347 | goto no_present; | 344 | goto no_present; |
348 | } | ||
349 | 345 | ||
350 | if (!(gpte & PT_ACCESSED_MASK)) | 346 | if (!(gpte & PT_ACCESSED_MASK)) |
351 | goto no_present; | 347 | goto no_present; |
@@ -353,7 +349,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, | |||
353 | return false; | 349 | return false; |
354 | 350 | ||
355 | no_present: | 351 | no_present: |
356 | drop_spte(vcpu->kvm, spte, nonpresent); | 352 | drop_spte(vcpu->kvm, spte); |
357 | return true; | 353 | return true; |
358 | } | 354 | } |
359 | 355 | ||
@@ -369,9 +365,9 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
369 | return; | 365 | return; |
370 | 366 | ||
371 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 367 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
372 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 368 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); |
373 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); | 369 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); |
374 | if (is_error_pfn(pfn)) { | 370 | if (mmu_invalid_pfn(pfn)) { |
375 | kvm_release_pfn_clean(pfn); | 371 | kvm_release_pfn_clean(pfn); |
376 | return; | 372 | return; |
377 | } | 373 | } |
@@ -381,7 +377,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
381 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 377 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
382 | */ | 378 | */ |
383 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 379 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
384 | is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, | 380 | NULL, PT_PAGE_TABLE_LEVEL, |
385 | gpte_to_gfn(gpte), pfn, true, true); | 381 | gpte_to_gfn(gpte), pfn, true, true); |
386 | } | 382 | } |
387 | 383 | ||
@@ -432,12 +428,11 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
432 | unsigned pte_access; | 428 | unsigned pte_access; |
433 | gfn_t gfn; | 429 | gfn_t gfn; |
434 | pfn_t pfn; | 430 | pfn_t pfn; |
435 | bool dirty; | ||
436 | 431 | ||
437 | if (spte == sptep) | 432 | if (spte == sptep) |
438 | continue; | 433 | continue; |
439 | 434 | ||
440 | if (*spte != shadow_trap_nonpresent_pte) | 435 | if (is_shadow_present_pte(*spte)) |
441 | continue; | 436 | continue; |
442 | 437 | ||
443 | gpte = gptep[i]; | 438 | gpte = gptep[i]; |
@@ -445,18 +440,18 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
445 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) | 440 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) |
446 | continue; | 441 | continue; |
447 | 442 | ||
448 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 443 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, |
444 | true); | ||
449 | gfn = gpte_to_gfn(gpte); | 445 | gfn = gpte_to_gfn(gpte); |
450 | dirty = is_dirty_gpte(gpte); | ||
451 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | 446 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, |
452 | (pte_access & ACC_WRITE_MASK) && dirty); | 447 | pte_access & ACC_WRITE_MASK); |
453 | if (is_error_pfn(pfn)) { | 448 | if (mmu_invalid_pfn(pfn)) { |
454 | kvm_release_pfn_clean(pfn); | 449 | kvm_release_pfn_clean(pfn); |
455 | break; | 450 | break; |
456 | } | 451 | } |
457 | 452 | ||
458 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 453 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
459 | dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn, | 454 | NULL, PT_PAGE_TABLE_LEVEL, gfn, |
460 | pfn, true, true); | 455 | pfn, true, true); |
461 | } | 456 | } |
462 | } | 457 | } |
@@ -467,12 +462,11 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, | |||
467 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 462 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, |
468 | struct guest_walker *gw, | 463 | struct guest_walker *gw, |
469 | int user_fault, int write_fault, int hlevel, | 464 | int user_fault, int write_fault, int hlevel, |
470 | int *ptwrite, pfn_t pfn, bool map_writable, | 465 | int *emulate, pfn_t pfn, bool map_writable, |
471 | bool prefault) | 466 | bool prefault) |
472 | { | 467 | { |
473 | unsigned access = gw->pt_access; | 468 | unsigned access = gw->pt_access; |
474 | struct kvm_mmu_page *sp = NULL; | 469 | struct kvm_mmu_page *sp = NULL; |
475 | bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); | ||
476 | int top_level; | 470 | int top_level; |
477 | unsigned direct_access; | 471 | unsigned direct_access; |
478 | struct kvm_shadow_walk_iterator it; | 472 | struct kvm_shadow_walk_iterator it; |
@@ -480,9 +474,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
480 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | 474 | if (!is_present_gpte(gw->ptes[gw->level - 1])) |
481 | return NULL; | 475 | return NULL; |
482 | 476 | ||
483 | direct_access = gw->pt_access & gw->pte_access; | 477 | direct_access = gw->pte_access; |
484 | if (!dirty) | ||
485 | direct_access &= ~ACC_WRITE_MASK; | ||
486 | 478 | ||
487 | top_level = vcpu->arch.mmu.root_level; | 479 | top_level = vcpu->arch.mmu.root_level; |
488 | if (top_level == PT32E_ROOT_LEVEL) | 480 | if (top_level == PT32E_ROOT_LEVEL) |
@@ -540,8 +532,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
540 | link_shadow_page(it.sptep, sp); | 532 | link_shadow_page(it.sptep, sp); |
541 | } | 533 | } |
542 | 534 | ||
543 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | 535 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, |
544 | user_fault, write_fault, dirty, ptwrite, it.level, | 536 | user_fault, write_fault, emulate, it.level, |
545 | gw->gfn, pfn, prefault, map_writable); | 537 | gw->gfn, pfn, prefault, map_writable); |
546 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); | 538 | FNAME(pte_prefetch)(vcpu, gw, it.sptep); |
547 | 539 | ||
@@ -575,7 +567,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
575 | int user_fault = error_code & PFERR_USER_MASK; | 567 | int user_fault = error_code & PFERR_USER_MASK; |
576 | struct guest_walker walker; | 568 | struct guest_walker walker; |
577 | u64 *sptep; | 569 | u64 *sptep; |
578 | int write_pt = 0; | 570 | int emulate = 0; |
579 | int r; | 571 | int r; |
580 | pfn_t pfn; | 572 | pfn_t pfn; |
581 | int level = PT_PAGE_TABLE_LEVEL; | 573 | int level = PT_PAGE_TABLE_LEVEL; |
@@ -585,6 +577,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
585 | 577 | ||
586 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 578 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
587 | 579 | ||
580 | if (unlikely(error_code & PFERR_RSVD_MASK)) | ||
581 | return handle_mmio_page_fault(vcpu, addr, error_code, | ||
582 | mmu_is_nested(vcpu)); | ||
583 | |||
588 | r = mmu_topup_memory_caches(vcpu); | 584 | r = mmu_topup_memory_caches(vcpu); |
589 | if (r) | 585 | if (r) |
590 | return r; | 586 | return r; |
@@ -623,9 +619,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
623 | &map_writable)) | 619 | &map_writable)) |
624 | return 0; | 620 | return 0; |
625 | 621 | ||
626 | /* mmio */ | 622 | if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr, |
627 | if (is_error_pfn(pfn)) | 623 | walker.gfn, pfn, walker.pte_access, &r)) |
628 | return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); | 624 | return r; |
629 | 625 | ||
630 | spin_lock(&vcpu->kvm->mmu_lock); | 626 | spin_lock(&vcpu->kvm->mmu_lock); |
631 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 627 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
@@ -636,19 +632,19 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
636 | if (!force_pt_level) | 632 | if (!force_pt_level) |
637 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 633 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
638 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 634 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
639 | level, &write_pt, pfn, map_writable, prefault); | 635 | level, &emulate, pfn, map_writable, prefault); |
640 | (void)sptep; | 636 | (void)sptep; |
641 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 637 | pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, |
642 | sptep, *sptep, write_pt); | 638 | sptep, *sptep, emulate); |
643 | 639 | ||
644 | if (!write_pt) | 640 | if (!emulate) |
645 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 641 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ |
646 | 642 | ||
647 | ++vcpu->stat.pf_fixed; | 643 | ++vcpu->stat.pf_fixed; |
648 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); | 644 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); |
649 | spin_unlock(&vcpu->kvm->mmu_lock); | 645 | spin_unlock(&vcpu->kvm->mmu_lock); |
650 | 646 | ||
651 | return write_pt; | 647 | return emulate; |
652 | 648 | ||
653 | out_unlock: | 649 | out_unlock: |
654 | spin_unlock(&vcpu->kvm->mmu_lock); | 650 | spin_unlock(&vcpu->kvm->mmu_lock); |
@@ -665,6 +661,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
665 | u64 *sptep; | 661 | u64 *sptep; |
666 | int need_flush = 0; | 662 | int need_flush = 0; |
667 | 663 | ||
664 | vcpu_clear_mmio_info(vcpu, gva); | ||
665 | |||
668 | spin_lock(&vcpu->kvm->mmu_lock); | 666 | spin_lock(&vcpu->kvm->mmu_lock); |
669 | 667 | ||
670 | for_each_shadow_entry(vcpu, gva, iterator) { | 668 | for_each_shadow_entry(vcpu, gva, iterator) { |
@@ -688,11 +686,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
688 | if (is_shadow_present_pte(*sptep)) { | 686 | if (is_shadow_present_pte(*sptep)) { |
689 | if (is_large_pte(*sptep)) | 687 | if (is_large_pte(*sptep)) |
690 | --vcpu->kvm->stat.lpages; | 688 | --vcpu->kvm->stat.lpages; |
691 | drop_spte(vcpu->kvm, sptep, | 689 | drop_spte(vcpu->kvm, sptep); |
692 | shadow_trap_nonpresent_pte); | ||
693 | need_flush = 1; | 690 | need_flush = 1; |
694 | } else | 691 | } else if (is_mmio_spte(*sptep)) |
695 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 692 | mmu_spte_clear_no_track(sptep); |
693 | |||
696 | break; | 694 | break; |
697 | } | 695 | } |
698 | 696 | ||
@@ -752,36 +750,6 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
752 | return gpa; | 750 | return gpa; |
753 | } | 751 | } |
754 | 752 | ||
755 | static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | ||
756 | struct kvm_mmu_page *sp) | ||
757 | { | ||
758 | int i, j, offset, r; | ||
759 | pt_element_t pt[256 / sizeof(pt_element_t)]; | ||
760 | gpa_t pte_gpa; | ||
761 | |||
762 | if (sp->role.direct | ||
763 | || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { | ||
764 | nonpaging_prefetch_page(vcpu, sp); | ||
765 | return; | ||
766 | } | ||
767 | |||
768 | pte_gpa = gfn_to_gpa(sp->gfn); | ||
769 | if (PTTYPE == 32) { | ||
770 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
771 | pte_gpa += offset * sizeof(pt_element_t); | ||
772 | } | ||
773 | |||
774 | for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) { | ||
775 | r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt); | ||
776 | pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t); | ||
777 | for (j = 0; j < ARRAY_SIZE(pt); ++j) | ||
778 | if (r || is_present_gpte(pt[j])) | ||
779 | sp->spt[i+j] = shadow_trap_nonpresent_pte; | ||
780 | else | ||
781 | sp->spt[i+j] = shadow_notrap_nonpresent_pte; | ||
782 | } | ||
783 | } | ||
784 | |||
785 | /* | 753 | /* |
786 | * Using the cached information from sp->gfns is safe because: | 754 | * Using the cached information from sp->gfns is safe because: |
787 | * - The spte has a reference to the struct page, so the pfn for a given gfn | 755 | * - The spte has a reference to the struct page, so the pfn for a given gfn |
@@ -817,7 +785,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
817 | gpa_t pte_gpa; | 785 | gpa_t pte_gpa; |
818 | gfn_t gfn; | 786 | gfn_t gfn; |
819 | 787 | ||
820 | if (!is_shadow_present_pte(sp->spt[i])) | 788 | if (!sp->spt[i]) |
821 | continue; | 789 | continue; |
822 | 790 | ||
823 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); | 791 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
@@ -826,26 +794,30 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
826 | sizeof(pt_element_t))) | 794 | sizeof(pt_element_t))) |
827 | return -EINVAL; | 795 | return -EINVAL; |
828 | 796 | ||
829 | gfn = gpte_to_gfn(gpte); | ||
830 | |||
831 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { | 797 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
832 | vcpu->kvm->tlbs_dirty++; | 798 | vcpu->kvm->tlbs_dirty++; |
833 | continue; | 799 | continue; |
834 | } | 800 | } |
835 | 801 | ||
802 | gfn = gpte_to_gfn(gpte); | ||
803 | pte_access = sp->role.access; | ||
804 | pte_access &= FNAME(gpte_access)(vcpu, gpte, true); | ||
805 | |||
806 | if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) | ||
807 | continue; | ||
808 | |||
836 | if (gfn != sp->gfns[i]) { | 809 | if (gfn != sp->gfns[i]) { |
837 | drop_spte(vcpu->kvm, &sp->spt[i], | 810 | drop_spte(vcpu->kvm, &sp->spt[i]); |
838 | shadow_trap_nonpresent_pte); | ||
839 | vcpu->kvm->tlbs_dirty++; | 811 | vcpu->kvm->tlbs_dirty++; |
840 | continue; | 812 | continue; |
841 | } | 813 | } |
842 | 814 | ||
843 | nr_present++; | 815 | nr_present++; |
844 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 816 | |
845 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; | 817 | host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; |
846 | 818 | ||
847 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 819 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
848 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, | 820 | PT_PAGE_TABLE_LEVEL, gfn, |
849 | spte_to_pfn(sp->spt[i]), true, false, | 821 | spte_to_pfn(sp->spt[i]), true, false, |
850 | host_writable); | 822 | host_writable); |
851 | } | 823 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 506e4fe23adc..475d1c948501 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1496,11 +1496,14 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1496 | update_cr0_intercept(svm); | 1496 | update_cr0_intercept(svm); |
1497 | } | 1497 | } |
1498 | 1498 | ||
1499 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1499 | static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
1500 | { | 1500 | { |
1501 | unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; | 1501 | unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; |
1502 | unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; | 1502 | unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; |
1503 | 1503 | ||
1504 | if (cr4 & X86_CR4_VMXE) | ||
1505 | return 1; | ||
1506 | |||
1504 | if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) | 1507 | if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) |
1505 | svm_flush_tlb(vcpu); | 1508 | svm_flush_tlb(vcpu); |
1506 | 1509 | ||
@@ -1510,6 +1513,7 @@ static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1510 | cr4 |= host_cr4_mce; | 1513 | cr4 |= host_cr4_mce; |
1511 | to_svm(vcpu)->vmcb->save.cr4 = cr4; | 1514 | to_svm(vcpu)->vmcb->save.cr4 = cr4; |
1512 | mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); | 1515 | mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); |
1516 | return 0; | ||
1513 | } | 1517 | } |
1514 | 1518 | ||
1515 | static void svm_set_segment(struct kvm_vcpu *vcpu, | 1519 | static void svm_set_segment(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index db932760ea82..3ff898c104f7 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -675,12 +675,12 @@ TRACE_EVENT(kvm_emulate_insn, | |||
675 | ), | 675 | ), |
676 | 676 | ||
677 | TP_fast_assign( | 677 | TP_fast_assign( |
678 | __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; | 678 | __entry->rip = vcpu->arch.emulate_ctxt.fetch.start; |
679 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | 679 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); |
680 | __entry->len = vcpu->arch.emulate_ctxt.decode.eip | 680 | __entry->len = vcpu->arch.emulate_ctxt._eip |
681 | - vcpu->arch.emulate_ctxt.decode.fetch.start; | 681 | - vcpu->arch.emulate_ctxt.fetch.start; |
682 | memcpy(__entry->insn, | 682 | memcpy(__entry->insn, |
683 | vcpu->arch.emulate_ctxt.decode.fetch.data, | 683 | vcpu->arch.emulate_ctxt.fetch.data, |
684 | 15); | 684 | 15); |
685 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); | 685 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); |
686 | __entry->failed = failed; | 686 | __entry->failed = failed; |
@@ -698,6 +698,29 @@ TRACE_EVENT(kvm_emulate_insn, | |||
698 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) | 698 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) |
699 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) | 699 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) |
700 | 700 | ||
701 | TRACE_EVENT( | ||
702 | vcpu_match_mmio, | ||
703 | TP_PROTO(gva_t gva, gpa_t gpa, bool write, bool gpa_match), | ||
704 | TP_ARGS(gva, gpa, write, gpa_match), | ||
705 | |||
706 | TP_STRUCT__entry( | ||
707 | __field(gva_t, gva) | ||
708 | __field(gpa_t, gpa) | ||
709 | __field(bool, write) | ||
710 | __field(bool, gpa_match) | ||
711 | ), | ||
712 | |||
713 | TP_fast_assign( | ||
714 | __entry->gva = gva; | ||
715 | __entry->gpa = gpa; | ||
716 | __entry->write = write; | ||
717 | __entry->gpa_match = gpa_match | ||
718 | ), | ||
719 | |||
720 | TP_printk("gva %#lx gpa %#llx %s %s", __entry->gva, __entry->gpa, | ||
721 | __entry->write ? "Write" : "Read", | ||
722 | __entry->gpa_match ? "GPA" : "GVA") | ||
723 | ); | ||
701 | #endif /* _TRACE_KVM_H */ | 724 | #endif /* _TRACE_KVM_H */ |
702 | 725 | ||
703 | #undef TRACE_INCLUDE_PATH | 726 | #undef TRACE_INCLUDE_PATH |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d48ec60ea421..e65a158dee64 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -43,13 +43,12 @@ | |||
43 | #include "trace.h" | 43 | #include "trace.h" |
44 | 44 | ||
45 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 45 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
46 | #define __ex_clear(x, reg) \ | ||
47 | ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg) | ||
46 | 48 | ||
47 | MODULE_AUTHOR("Qumranet"); | 49 | MODULE_AUTHOR("Qumranet"); |
48 | MODULE_LICENSE("GPL"); | 50 | MODULE_LICENSE("GPL"); |
49 | 51 | ||
50 | static int __read_mostly bypass_guest_pf = 1; | ||
51 | module_param(bypass_guest_pf, bool, S_IRUGO); | ||
52 | |||
53 | static int __read_mostly enable_vpid = 1; | 52 | static int __read_mostly enable_vpid = 1; |
54 | module_param_named(vpid, enable_vpid, bool, 0444); | 53 | module_param_named(vpid, enable_vpid, bool, 0444); |
55 | 54 | ||
@@ -72,6 +71,14 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
72 | static int __read_mostly yield_on_hlt = 1; | 71 | static int __read_mostly yield_on_hlt = 1; |
73 | module_param(yield_on_hlt, bool, S_IRUGO); | 72 | module_param(yield_on_hlt, bool, S_IRUGO); |
74 | 73 | ||
74 | /* | ||
75 | * If nested=1, nested virtualization is supported, i.e., guests may use | ||
76 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | ||
77 | * use VMX instructions. | ||
78 | */ | ||
79 | static int __read_mostly nested = 0; | ||
80 | module_param(nested, bool, S_IRUGO); | ||
81 | |||
75 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 82 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
76 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 83 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
77 | #define KVM_GUEST_CR0_MASK \ | 84 | #define KVM_GUEST_CR0_MASK \ |
@@ -109,6 +116,7 @@ static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | |||
109 | module_param(ple_window, int, S_IRUGO); | 116 | module_param(ple_window, int, S_IRUGO); |
110 | 117 | ||
111 | #define NR_AUTOLOAD_MSRS 1 | 118 | #define NR_AUTOLOAD_MSRS 1 |
119 | #define VMCS02_POOL_SIZE 1 | ||
112 | 120 | ||
113 | struct vmcs { | 121 | struct vmcs { |
114 | u32 revision_id; | 122 | u32 revision_id; |
@@ -116,17 +124,237 @@ struct vmcs { | |||
116 | char data[0]; | 124 | char data[0]; |
117 | }; | 125 | }; |
118 | 126 | ||
127 | /* | ||
128 | * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also | ||
129 | * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs | ||
130 | * loaded on this CPU (so we can clear them if the CPU goes down). | ||
131 | */ | ||
132 | struct loaded_vmcs { | ||
133 | struct vmcs *vmcs; | ||
134 | int cpu; | ||
135 | int launched; | ||
136 | struct list_head loaded_vmcss_on_cpu_link; | ||
137 | }; | ||
138 | |||
119 | struct shared_msr_entry { | 139 | struct shared_msr_entry { |
120 | unsigned index; | 140 | unsigned index; |
121 | u64 data; | 141 | u64 data; |
122 | u64 mask; | 142 | u64 mask; |
123 | }; | 143 | }; |
124 | 144 | ||
145 | /* | ||
146 | * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a | ||
147 | * single nested guest (L2), hence the name vmcs12. Any VMX implementation has | ||
148 | * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is | ||
149 | * stored in guest memory specified by VMPTRLD, but is opaque to the guest, | ||
150 | * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. | ||
151 | * More than one of these structures may exist, if L1 runs multiple L2 guests. | ||
152 | * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the | ||
153 | * underlying hardware which will be used to run L2. | ||
154 | * This structure is packed to ensure that its layout is identical across | ||
155 | * machines (necessary for live migration). | ||
156 | * If there are changes in this struct, VMCS12_REVISION must be changed. | ||
157 | */ | ||
158 | typedef u64 natural_width; | ||
159 | struct __packed vmcs12 { | ||
160 | /* According to the Intel spec, a VMCS region must start with the | ||
161 | * following two fields. Then follow implementation-specific data. | ||
162 | */ | ||
163 | u32 revision_id; | ||
164 | u32 abort; | ||
165 | |||
166 | u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */ | ||
167 | u32 padding[7]; /* room for future expansion */ | ||
168 | |||
169 | u64 io_bitmap_a; | ||
170 | u64 io_bitmap_b; | ||
171 | u64 msr_bitmap; | ||
172 | u64 vm_exit_msr_store_addr; | ||
173 | u64 vm_exit_msr_load_addr; | ||
174 | u64 vm_entry_msr_load_addr; | ||
175 | u64 tsc_offset; | ||
176 | u64 virtual_apic_page_addr; | ||
177 | u64 apic_access_addr; | ||
178 | u64 ept_pointer; | ||
179 | u64 guest_physical_address; | ||
180 | u64 vmcs_link_pointer; | ||
181 | u64 guest_ia32_debugctl; | ||
182 | u64 guest_ia32_pat; | ||
183 | u64 guest_ia32_efer; | ||
184 | u64 guest_ia32_perf_global_ctrl; | ||
185 | u64 guest_pdptr0; | ||
186 | u64 guest_pdptr1; | ||
187 | u64 guest_pdptr2; | ||
188 | u64 guest_pdptr3; | ||
189 | u64 host_ia32_pat; | ||
190 | u64 host_ia32_efer; | ||
191 | u64 host_ia32_perf_global_ctrl; | ||
192 | u64 padding64[8]; /* room for future expansion */ | ||
193 | /* | ||
194 | * To allow migration of L1 (complete with its L2 guests) between | ||
195 | * machines of different natural widths (32 or 64 bit), we cannot have | ||
196 | * unsigned long fields with no explict size. We use u64 (aliased | ||
197 | * natural_width) instead. Luckily, x86 is little-endian. | ||
198 | */ | ||
199 | natural_width cr0_guest_host_mask; | ||
200 | natural_width cr4_guest_host_mask; | ||
201 | natural_width cr0_read_shadow; | ||
202 | natural_width cr4_read_shadow; | ||
203 | natural_width cr3_target_value0; | ||
204 | natural_width cr3_target_value1; | ||
205 | natural_width cr3_target_value2; | ||
206 | natural_width cr3_target_value3; | ||
207 | natural_width exit_qualification; | ||
208 | natural_width guest_linear_address; | ||
209 | natural_width guest_cr0; | ||
210 | natural_width guest_cr3; | ||
211 | natural_width guest_cr4; | ||
212 | natural_width guest_es_base; | ||
213 | natural_width guest_cs_base; | ||
214 | natural_width guest_ss_base; | ||
215 | natural_width guest_ds_base; | ||
216 | natural_width guest_fs_base; | ||
217 | natural_width guest_gs_base; | ||
218 | natural_width guest_ldtr_base; | ||
219 | natural_width guest_tr_base; | ||
220 | natural_width guest_gdtr_base; | ||
221 | natural_width guest_idtr_base; | ||
222 | natural_width guest_dr7; | ||
223 | natural_width guest_rsp; | ||
224 | natural_width guest_rip; | ||
225 | natural_width guest_rflags; | ||
226 | natural_width guest_pending_dbg_exceptions; | ||
227 | natural_width guest_sysenter_esp; | ||
228 | natural_width guest_sysenter_eip; | ||
229 | natural_width host_cr0; | ||
230 | natural_width host_cr3; | ||
231 | natural_width host_cr4; | ||
232 | natural_width host_fs_base; | ||
233 | natural_width host_gs_base; | ||
234 | natural_width host_tr_base; | ||
235 | natural_width host_gdtr_base; | ||
236 | natural_width host_idtr_base; | ||
237 | natural_width host_ia32_sysenter_esp; | ||
238 | natural_width host_ia32_sysenter_eip; | ||
239 | natural_width host_rsp; | ||
240 | natural_width host_rip; | ||
241 | natural_width paddingl[8]; /* room for future expansion */ | ||
242 | u32 pin_based_vm_exec_control; | ||
243 | u32 cpu_based_vm_exec_control; | ||
244 | u32 exception_bitmap; | ||
245 | u32 page_fault_error_code_mask; | ||
246 | u32 page_fault_error_code_match; | ||
247 | u32 cr3_target_count; | ||
248 | u32 vm_exit_controls; | ||
249 | u32 vm_exit_msr_store_count; | ||
250 | u32 vm_exit_msr_load_count; | ||
251 | u32 vm_entry_controls; | ||
252 | u32 vm_entry_msr_load_count; | ||
253 | u32 vm_entry_intr_info_field; | ||
254 | u32 vm_entry_exception_error_code; | ||
255 | u32 vm_entry_instruction_len; | ||
256 | u32 tpr_threshold; | ||
257 | u32 secondary_vm_exec_control; | ||
258 | u32 vm_instruction_error; | ||
259 | u32 vm_exit_reason; | ||
260 | u32 vm_exit_intr_info; | ||
261 | u32 vm_exit_intr_error_code; | ||
262 | u32 idt_vectoring_info_field; | ||
263 | u32 idt_vectoring_error_code; | ||
264 | u32 vm_exit_instruction_len; | ||
265 | u32 vmx_instruction_info; | ||
266 | u32 guest_es_limit; | ||
267 | u32 guest_cs_limit; | ||
268 | u32 guest_ss_limit; | ||
269 | u32 guest_ds_limit; | ||
270 | u32 guest_fs_limit; | ||
271 | u32 guest_gs_limit; | ||
272 | u32 guest_ldtr_limit; | ||
273 | u32 guest_tr_limit; | ||
274 | u32 guest_gdtr_limit; | ||
275 | u32 guest_idtr_limit; | ||
276 | u32 guest_es_ar_bytes; | ||
277 | u32 guest_cs_ar_bytes; | ||
278 | u32 guest_ss_ar_bytes; | ||
279 | u32 guest_ds_ar_bytes; | ||
280 | u32 guest_fs_ar_bytes; | ||
281 | u32 guest_gs_ar_bytes; | ||
282 | u32 guest_ldtr_ar_bytes; | ||
283 | u32 guest_tr_ar_bytes; | ||
284 | u32 guest_interruptibility_info; | ||
285 | u32 guest_activity_state; | ||
286 | u32 guest_sysenter_cs; | ||
287 | u32 host_ia32_sysenter_cs; | ||
288 | u32 padding32[8]; /* room for future expansion */ | ||
289 | u16 virtual_processor_id; | ||
290 | u16 guest_es_selector; | ||
291 | u16 guest_cs_selector; | ||
292 | u16 guest_ss_selector; | ||
293 | u16 guest_ds_selector; | ||
294 | u16 guest_fs_selector; | ||
295 | u16 guest_gs_selector; | ||
296 | u16 guest_ldtr_selector; | ||
297 | u16 guest_tr_selector; | ||
298 | u16 host_es_selector; | ||
299 | u16 host_cs_selector; | ||
300 | u16 host_ss_selector; | ||
301 | u16 host_ds_selector; | ||
302 | u16 host_fs_selector; | ||
303 | u16 host_gs_selector; | ||
304 | u16 host_tr_selector; | ||
305 | }; | ||
306 | |||
307 | /* | ||
308 | * VMCS12_REVISION is an arbitrary id that should be changed if the content or | ||
309 | * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and | ||
310 | * VMPTRLD verifies that the VMCS region that L1 is loading contains this id. | ||
311 | */ | ||
312 | #define VMCS12_REVISION 0x11e57ed0 | ||
313 | |||
314 | /* | ||
315 | * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region | ||
316 | * and any VMCS region. Although only sizeof(struct vmcs12) are used by the | ||
317 | * current implementation, 4K are reserved to avoid future complications. | ||
318 | */ | ||
319 | #define VMCS12_SIZE 0x1000 | ||
320 | |||
321 | /* Used to remember the last vmcs02 used for some recently used vmcs12s */ | ||
322 | struct vmcs02_list { | ||
323 | struct list_head list; | ||
324 | gpa_t vmptr; | ||
325 | struct loaded_vmcs vmcs02; | ||
326 | }; | ||
327 | |||
328 | /* | ||
329 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need | ||
330 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. | ||
331 | */ | ||
332 | struct nested_vmx { | ||
333 | /* Has the level1 guest done vmxon? */ | ||
334 | bool vmxon; | ||
335 | |||
336 | /* The guest-physical address of the current VMCS L1 keeps for L2 */ | ||
337 | gpa_t current_vmptr; | ||
338 | /* The host-usable pointer to the above */ | ||
339 | struct page *current_vmcs12_page; | ||
340 | struct vmcs12 *current_vmcs12; | ||
341 | |||
342 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ | ||
343 | struct list_head vmcs02_pool; | ||
344 | int vmcs02_num; | ||
345 | u64 vmcs01_tsc_offset; | ||
346 | /* L2 must run next, and mustn't decide to exit to L1. */ | ||
347 | bool nested_run_pending; | ||
348 | /* | ||
349 | * Guest pages referred to in vmcs02 with host-physical pointers, so | ||
350 | * we must keep them pinned while L2 runs. | ||
351 | */ | ||
352 | struct page *apic_access_page; | ||
353 | }; | ||
354 | |||
125 | struct vcpu_vmx { | 355 | struct vcpu_vmx { |
126 | struct kvm_vcpu vcpu; | 356 | struct kvm_vcpu vcpu; |
127 | struct list_head local_vcpus_link; | ||
128 | unsigned long host_rsp; | 357 | unsigned long host_rsp; |
129 | int launched; | ||
130 | u8 fail; | 358 | u8 fail; |
131 | u8 cpl; | 359 | u8 cpl; |
132 | bool nmi_known_unmasked; | 360 | bool nmi_known_unmasked; |
@@ -140,7 +368,14 @@ struct vcpu_vmx { | |||
140 | u64 msr_host_kernel_gs_base; | 368 | u64 msr_host_kernel_gs_base; |
141 | u64 msr_guest_kernel_gs_base; | 369 | u64 msr_guest_kernel_gs_base; |
142 | #endif | 370 | #endif |
143 | struct vmcs *vmcs; | 371 | /* |
372 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a | ||
373 | * non-nested (L1) guest, it always points to vmcs01. For a nested | ||
374 | * guest (L2), it points to a different VMCS. | ||
375 | */ | ||
376 | struct loaded_vmcs vmcs01; | ||
377 | struct loaded_vmcs *loaded_vmcs; | ||
378 | bool __launched; /* temporary, used in vmx_vcpu_run */ | ||
144 | struct msr_autoload { | 379 | struct msr_autoload { |
145 | unsigned nr; | 380 | unsigned nr; |
146 | struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; | 381 | struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; |
@@ -176,6 +411,9 @@ struct vcpu_vmx { | |||
176 | u32 exit_reason; | 411 | u32 exit_reason; |
177 | 412 | ||
178 | bool rdtscp_enabled; | 413 | bool rdtscp_enabled; |
414 | |||
415 | /* Support for a guest hypervisor (nested VMX) */ | ||
416 | struct nested_vmx nested; | ||
179 | }; | 417 | }; |
180 | 418 | ||
181 | enum segment_cache_field { | 419 | enum segment_cache_field { |
@@ -192,6 +430,174 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
192 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 430 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
193 | } | 431 | } |
194 | 432 | ||
433 | #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) | ||
434 | #define FIELD(number, name) [number] = VMCS12_OFFSET(name) | ||
435 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ | ||
436 | [number##_HIGH] = VMCS12_OFFSET(name)+4 | ||
437 | |||
438 | static unsigned short vmcs_field_to_offset_table[] = { | ||
439 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), | ||
440 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), | ||
441 | FIELD(GUEST_CS_SELECTOR, guest_cs_selector), | ||
442 | FIELD(GUEST_SS_SELECTOR, guest_ss_selector), | ||
443 | FIELD(GUEST_DS_SELECTOR, guest_ds_selector), | ||
444 | FIELD(GUEST_FS_SELECTOR, guest_fs_selector), | ||
445 | FIELD(GUEST_GS_SELECTOR, guest_gs_selector), | ||
446 | FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector), | ||
447 | FIELD(GUEST_TR_SELECTOR, guest_tr_selector), | ||
448 | FIELD(HOST_ES_SELECTOR, host_es_selector), | ||
449 | FIELD(HOST_CS_SELECTOR, host_cs_selector), | ||
450 | FIELD(HOST_SS_SELECTOR, host_ss_selector), | ||
451 | FIELD(HOST_DS_SELECTOR, host_ds_selector), | ||
452 | FIELD(HOST_FS_SELECTOR, host_fs_selector), | ||
453 | FIELD(HOST_GS_SELECTOR, host_gs_selector), | ||
454 | FIELD(HOST_TR_SELECTOR, host_tr_selector), | ||
455 | FIELD64(IO_BITMAP_A, io_bitmap_a), | ||
456 | FIELD64(IO_BITMAP_B, io_bitmap_b), | ||
457 | FIELD64(MSR_BITMAP, msr_bitmap), | ||
458 | FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr), | ||
459 | FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr), | ||
460 | FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr), | ||
461 | FIELD64(TSC_OFFSET, tsc_offset), | ||
462 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), | ||
463 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), | ||
464 | FIELD64(EPT_POINTER, ept_pointer), | ||
465 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), | ||
466 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), | ||
467 | FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), | ||
468 | FIELD64(GUEST_IA32_PAT, guest_ia32_pat), | ||
469 | FIELD64(GUEST_IA32_EFER, guest_ia32_efer), | ||
470 | FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl), | ||
471 | FIELD64(GUEST_PDPTR0, guest_pdptr0), | ||
472 | FIELD64(GUEST_PDPTR1, guest_pdptr1), | ||
473 | FIELD64(GUEST_PDPTR2, guest_pdptr2), | ||
474 | FIELD64(GUEST_PDPTR3, guest_pdptr3), | ||
475 | FIELD64(HOST_IA32_PAT, host_ia32_pat), | ||
476 | FIELD64(HOST_IA32_EFER, host_ia32_efer), | ||
477 | FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), | ||
478 | FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control), | ||
479 | FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control), | ||
480 | FIELD(EXCEPTION_BITMAP, exception_bitmap), | ||
481 | FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask), | ||
482 | FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match), | ||
483 | FIELD(CR3_TARGET_COUNT, cr3_target_count), | ||
484 | FIELD(VM_EXIT_CONTROLS, vm_exit_controls), | ||
485 | FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count), | ||
486 | FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count), | ||
487 | FIELD(VM_ENTRY_CONTROLS, vm_entry_controls), | ||
488 | FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count), | ||
489 | FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field), | ||
490 | FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code), | ||
491 | FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len), | ||
492 | FIELD(TPR_THRESHOLD, tpr_threshold), | ||
493 | FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control), | ||
494 | FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error), | ||
495 | FIELD(VM_EXIT_REASON, vm_exit_reason), | ||
496 | FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info), | ||
497 | FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code), | ||
498 | FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field), | ||
499 | FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code), | ||
500 | FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len), | ||
501 | FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info), | ||
502 | FIELD(GUEST_ES_LIMIT, guest_es_limit), | ||
503 | FIELD(GUEST_CS_LIMIT, guest_cs_limit), | ||
504 | FIELD(GUEST_SS_LIMIT, guest_ss_limit), | ||
505 | FIELD(GUEST_DS_LIMIT, guest_ds_limit), | ||
506 | FIELD(GUEST_FS_LIMIT, guest_fs_limit), | ||
507 | FIELD(GUEST_GS_LIMIT, guest_gs_limit), | ||
508 | FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit), | ||
509 | FIELD(GUEST_TR_LIMIT, guest_tr_limit), | ||
510 | FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit), | ||
511 | FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit), | ||
512 | FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes), | ||
513 | FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes), | ||
514 | FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes), | ||
515 | FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes), | ||
516 | FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes), | ||
517 | FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes), | ||
518 | FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes), | ||
519 | FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes), | ||
520 | FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info), | ||
521 | FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), | ||
522 | FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), | ||
523 | FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), | ||
524 | FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), | ||
525 | FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), | ||
526 | FIELD(CR0_READ_SHADOW, cr0_read_shadow), | ||
527 | FIELD(CR4_READ_SHADOW, cr4_read_shadow), | ||
528 | FIELD(CR3_TARGET_VALUE0, cr3_target_value0), | ||
529 | FIELD(CR3_TARGET_VALUE1, cr3_target_value1), | ||
530 | FIELD(CR3_TARGET_VALUE2, cr3_target_value2), | ||
531 | FIELD(CR3_TARGET_VALUE3, cr3_target_value3), | ||
532 | FIELD(EXIT_QUALIFICATION, exit_qualification), | ||
533 | FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address), | ||
534 | FIELD(GUEST_CR0, guest_cr0), | ||
535 | FIELD(GUEST_CR3, guest_cr3), | ||
536 | FIELD(GUEST_CR4, guest_cr4), | ||
537 | FIELD(GUEST_ES_BASE, guest_es_base), | ||
538 | FIELD(GUEST_CS_BASE, guest_cs_base), | ||
539 | FIELD(GUEST_SS_BASE, guest_ss_base), | ||
540 | FIELD(GUEST_DS_BASE, guest_ds_base), | ||
541 | FIELD(GUEST_FS_BASE, guest_fs_base), | ||
542 | FIELD(GUEST_GS_BASE, guest_gs_base), | ||
543 | FIELD(GUEST_LDTR_BASE, guest_ldtr_base), | ||
544 | FIELD(GUEST_TR_BASE, guest_tr_base), | ||
545 | FIELD(GUEST_GDTR_BASE, guest_gdtr_base), | ||
546 | FIELD(GUEST_IDTR_BASE, guest_idtr_base), | ||
547 | FIELD(GUEST_DR7, guest_dr7), | ||
548 | FIELD(GUEST_RSP, guest_rsp), | ||
549 | FIELD(GUEST_RIP, guest_rip), | ||
550 | FIELD(GUEST_RFLAGS, guest_rflags), | ||
551 | FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), | ||
552 | FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), | ||
553 | FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), | ||
554 | FIELD(HOST_CR0, host_cr0), | ||
555 | FIELD(HOST_CR3, host_cr3), | ||
556 | FIELD(HOST_CR4, host_cr4), | ||
557 | FIELD(HOST_FS_BASE, host_fs_base), | ||
558 | FIELD(HOST_GS_BASE, host_gs_base), | ||
559 | FIELD(HOST_TR_BASE, host_tr_base), | ||
560 | FIELD(HOST_GDTR_BASE, host_gdtr_base), | ||
561 | FIELD(HOST_IDTR_BASE, host_idtr_base), | ||
562 | FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp), | ||
563 | FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip), | ||
564 | FIELD(HOST_RSP, host_rsp), | ||
565 | FIELD(HOST_RIP, host_rip), | ||
566 | }; | ||
567 | static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table); | ||
568 | |||
569 | static inline short vmcs_field_to_offset(unsigned long field) | ||
570 | { | ||
571 | if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0) | ||
572 | return -1; | ||
573 | return vmcs_field_to_offset_table[field]; | ||
574 | } | ||
575 | |||
576 | static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | ||
577 | { | ||
578 | return to_vmx(vcpu)->nested.current_vmcs12; | ||
579 | } | ||
580 | |||
581 | static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) | ||
582 | { | ||
583 | struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); | ||
584 | if (is_error_page(page)) { | ||
585 | kvm_release_page_clean(page); | ||
586 | return NULL; | ||
587 | } | ||
588 | return page; | ||
589 | } | ||
590 | |||
591 | static void nested_release_page(struct page *page) | ||
592 | { | ||
593 | kvm_release_page_dirty(page); | ||
594 | } | ||
595 | |||
596 | static void nested_release_page_clean(struct page *page) | ||
597 | { | ||
598 | kvm_release_page_clean(page); | ||
599 | } | ||
600 | |||
195 | static u64 construct_eptp(unsigned long root_hpa); | 601 | static u64 construct_eptp(unsigned long root_hpa); |
196 | static void kvm_cpu_vmxon(u64 addr); | 602 | static void kvm_cpu_vmxon(u64 addr); |
197 | static void kvm_cpu_vmxoff(void); | 603 | static void kvm_cpu_vmxoff(void); |
@@ -200,7 +606,11 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | |||
200 | 606 | ||
201 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 607 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
202 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 608 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
203 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); | 609 | /* |
610 | * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed | ||
611 | * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. | ||
612 | */ | ||
613 | static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); | ||
204 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); | 614 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); |
205 | 615 | ||
206 | static unsigned long *vmx_io_bitmap_a; | 616 | static unsigned long *vmx_io_bitmap_a; |
@@ -442,6 +852,35 @@ static inline bool report_flexpriority(void) | |||
442 | return flexpriority_enabled; | 852 | return flexpriority_enabled; |
443 | } | 853 | } |
444 | 854 | ||
855 | static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) | ||
856 | { | ||
857 | return vmcs12->cpu_based_vm_exec_control & bit; | ||
858 | } | ||
859 | |||
860 | static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) | ||
861 | { | ||
862 | return (vmcs12->cpu_based_vm_exec_control & | ||
863 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && | ||
864 | (vmcs12->secondary_vm_exec_control & bit); | ||
865 | } | ||
866 | |||
867 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12, | ||
868 | struct kvm_vcpu *vcpu) | ||
869 | { | ||
870 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; | ||
871 | } | ||
872 | |||
873 | static inline bool is_exception(u32 intr_info) | ||
874 | { | ||
875 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | ||
876 | == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); | ||
877 | } | ||
878 | |||
879 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); | ||
880 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | ||
881 | struct vmcs12 *vmcs12, | ||
882 | u32 reason, unsigned long qualification); | ||
883 | |||
445 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | 884 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) |
446 | { | 885 | { |
447 | int i; | 886 | int i; |
@@ -501,6 +940,13 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
501 | vmcs, phys_addr); | 940 | vmcs, phys_addr); |
502 | } | 941 | } |
503 | 942 | ||
943 | static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) | ||
944 | { | ||
945 | vmcs_clear(loaded_vmcs->vmcs); | ||
946 | loaded_vmcs->cpu = -1; | ||
947 | loaded_vmcs->launched = 0; | ||
948 | } | ||
949 | |||
504 | static void vmcs_load(struct vmcs *vmcs) | 950 | static void vmcs_load(struct vmcs *vmcs) |
505 | { | 951 | { |
506 | u64 phys_addr = __pa(vmcs); | 952 | u64 phys_addr = __pa(vmcs); |
@@ -510,29 +956,28 @@ static void vmcs_load(struct vmcs *vmcs) | |||
510 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) | 956 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
511 | : "cc", "memory"); | 957 | : "cc", "memory"); |
512 | if (error) | 958 | if (error) |
513 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | 959 | printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", |
514 | vmcs, phys_addr); | 960 | vmcs, phys_addr); |
515 | } | 961 | } |
516 | 962 | ||
517 | static void __vcpu_clear(void *arg) | 963 | static void __loaded_vmcs_clear(void *arg) |
518 | { | 964 | { |
519 | struct vcpu_vmx *vmx = arg; | 965 | struct loaded_vmcs *loaded_vmcs = arg; |
520 | int cpu = raw_smp_processor_id(); | 966 | int cpu = raw_smp_processor_id(); |
521 | 967 | ||
522 | if (vmx->vcpu.cpu == cpu) | 968 | if (loaded_vmcs->cpu != cpu) |
523 | vmcs_clear(vmx->vmcs); | 969 | return; /* vcpu migration can race with cpu offline */ |
524 | if (per_cpu(current_vmcs, cpu) == vmx->vmcs) | 970 | if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) |
525 | per_cpu(current_vmcs, cpu) = NULL; | 971 | per_cpu(current_vmcs, cpu) = NULL; |
526 | list_del(&vmx->local_vcpus_link); | 972 | list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); |
527 | vmx->vcpu.cpu = -1; | 973 | loaded_vmcs_init(loaded_vmcs); |
528 | vmx->launched = 0; | ||
529 | } | 974 | } |
530 | 975 | ||
531 | static void vcpu_clear(struct vcpu_vmx *vmx) | 976 | static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) |
532 | { | 977 | { |
533 | if (vmx->vcpu.cpu == -1) | 978 | if (loaded_vmcs->cpu != -1) |
534 | return; | 979 | smp_call_function_single( |
535 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); | 980 | loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1); |
536 | } | 981 | } |
537 | 982 | ||
538 | static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) | 983 | static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) |
@@ -585,26 +1030,26 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | |||
585 | } | 1030 | } |
586 | } | 1031 | } |
587 | 1032 | ||
588 | static unsigned long vmcs_readl(unsigned long field) | 1033 | static __always_inline unsigned long vmcs_readl(unsigned long field) |
589 | { | 1034 | { |
590 | unsigned long value = 0; | 1035 | unsigned long value; |
591 | 1036 | ||
592 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) | 1037 | asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0") |
593 | : "+a"(value) : "d"(field) : "cc"); | 1038 | : "=a"(value) : "d"(field) : "cc"); |
594 | return value; | 1039 | return value; |
595 | } | 1040 | } |
596 | 1041 | ||
597 | static u16 vmcs_read16(unsigned long field) | 1042 | static __always_inline u16 vmcs_read16(unsigned long field) |
598 | { | 1043 | { |
599 | return vmcs_readl(field); | 1044 | return vmcs_readl(field); |
600 | } | 1045 | } |
601 | 1046 | ||
602 | static u32 vmcs_read32(unsigned long field) | 1047 | static __always_inline u32 vmcs_read32(unsigned long field) |
603 | { | 1048 | { |
604 | return vmcs_readl(field); | 1049 | return vmcs_readl(field); |
605 | } | 1050 | } |
606 | 1051 | ||
607 | static u64 vmcs_read64(unsigned long field) | 1052 | static __always_inline u64 vmcs_read64(unsigned long field) |
608 | { | 1053 | { |
609 | #ifdef CONFIG_X86_64 | 1054 | #ifdef CONFIG_X86_64 |
610 | return vmcs_readl(field); | 1055 | return vmcs_readl(field); |
@@ -731,6 +1176,15 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
731 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 1176 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
732 | if (vcpu->fpu_active) | 1177 | if (vcpu->fpu_active) |
733 | eb &= ~(1u << NM_VECTOR); | 1178 | eb &= ~(1u << NM_VECTOR); |
1179 | |||
1180 | /* When we are running a nested L2 guest and L1 specified for it a | ||
1181 | * certain exception bitmap, we must trap the same exceptions and pass | ||
1182 | * them to L1. When running L2, we will only handle the exceptions | ||
1183 | * specified above if L1 did not want them. | ||
1184 | */ | ||
1185 | if (is_guest_mode(vcpu)) | ||
1186 | eb |= get_vmcs12(vcpu)->exception_bitmap; | ||
1187 | |||
734 | vmcs_write32(EXCEPTION_BITMAP, eb); | 1188 | vmcs_write32(EXCEPTION_BITMAP, eb); |
735 | } | 1189 | } |
736 | 1190 | ||
@@ -971,22 +1425,22 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
971 | 1425 | ||
972 | if (!vmm_exclusive) | 1426 | if (!vmm_exclusive) |
973 | kvm_cpu_vmxon(phys_addr); | 1427 | kvm_cpu_vmxon(phys_addr); |
974 | else if (vcpu->cpu != cpu) | 1428 | else if (vmx->loaded_vmcs->cpu != cpu) |
975 | vcpu_clear(vmx); | 1429 | loaded_vmcs_clear(vmx->loaded_vmcs); |
976 | 1430 | ||
977 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { | 1431 | if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { |
978 | per_cpu(current_vmcs, cpu) = vmx->vmcs; | 1432 | per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; |
979 | vmcs_load(vmx->vmcs); | 1433 | vmcs_load(vmx->loaded_vmcs->vmcs); |
980 | } | 1434 | } |
981 | 1435 | ||
982 | if (vcpu->cpu != cpu) { | 1436 | if (vmx->loaded_vmcs->cpu != cpu) { |
983 | struct desc_ptr *gdt = &__get_cpu_var(host_gdt); | 1437 | struct desc_ptr *gdt = &__get_cpu_var(host_gdt); |
984 | unsigned long sysenter_esp; | 1438 | unsigned long sysenter_esp; |
985 | 1439 | ||
986 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 1440 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
987 | local_irq_disable(); | 1441 | local_irq_disable(); |
988 | list_add(&vmx->local_vcpus_link, | 1442 | list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, |
989 | &per_cpu(vcpus_on_cpu, cpu)); | 1443 | &per_cpu(loaded_vmcss_on_cpu, cpu)); |
990 | local_irq_enable(); | 1444 | local_irq_enable(); |
991 | 1445 | ||
992 | /* | 1446 | /* |
@@ -998,6 +1452,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
998 | 1452 | ||
999 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 1453 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
1000 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 1454 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
1455 | vmx->loaded_vmcs->cpu = cpu; | ||
1001 | } | 1456 | } |
1002 | } | 1457 | } |
1003 | 1458 | ||
@@ -1005,7 +1460,8 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
1005 | { | 1460 | { |
1006 | __vmx_load_host_state(to_vmx(vcpu)); | 1461 | __vmx_load_host_state(to_vmx(vcpu)); |
1007 | if (!vmm_exclusive) { | 1462 | if (!vmm_exclusive) { |
1008 | __vcpu_clear(to_vmx(vcpu)); | 1463 | __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); |
1464 | vcpu->cpu = -1; | ||
1009 | kvm_cpu_vmxoff(); | 1465 | kvm_cpu_vmxoff(); |
1010 | } | 1466 | } |
1011 | } | 1467 | } |
@@ -1023,19 +1479,55 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | |||
1023 | vmcs_writel(GUEST_CR0, cr0); | 1479 | vmcs_writel(GUEST_CR0, cr0); |
1024 | update_exception_bitmap(vcpu); | 1480 | update_exception_bitmap(vcpu); |
1025 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | 1481 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; |
1482 | if (is_guest_mode(vcpu)) | ||
1483 | vcpu->arch.cr0_guest_owned_bits &= | ||
1484 | ~get_vmcs12(vcpu)->cr0_guest_host_mask; | ||
1026 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | 1485 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); |
1027 | } | 1486 | } |
1028 | 1487 | ||
1029 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | 1488 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); |
1030 | 1489 | ||
1490 | /* | ||
1491 | * Return the cr0 value that a nested guest would read. This is a combination | ||
1492 | * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by | ||
1493 | * its hypervisor (cr0_read_shadow). | ||
1494 | */ | ||
1495 | static inline unsigned long nested_read_cr0(struct vmcs12 *fields) | ||
1496 | { | ||
1497 | return (fields->guest_cr0 & ~fields->cr0_guest_host_mask) | | ||
1498 | (fields->cr0_read_shadow & fields->cr0_guest_host_mask); | ||
1499 | } | ||
1500 | static inline unsigned long nested_read_cr4(struct vmcs12 *fields) | ||
1501 | { | ||
1502 | return (fields->guest_cr4 & ~fields->cr4_guest_host_mask) | | ||
1503 | (fields->cr4_read_shadow & fields->cr4_guest_host_mask); | ||
1504 | } | ||
1505 | |||
1031 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | 1506 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) |
1032 | { | 1507 | { |
1508 | /* Note that there is no vcpu->fpu_active = 0 here. The caller must | ||
1509 | * set this *before* calling this function. | ||
1510 | */ | ||
1033 | vmx_decache_cr0_guest_bits(vcpu); | 1511 | vmx_decache_cr0_guest_bits(vcpu); |
1034 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); | 1512 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); |
1035 | update_exception_bitmap(vcpu); | 1513 | update_exception_bitmap(vcpu); |
1036 | vcpu->arch.cr0_guest_owned_bits = 0; | 1514 | vcpu->arch.cr0_guest_owned_bits = 0; |
1037 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | 1515 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); |
1038 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | 1516 | if (is_guest_mode(vcpu)) { |
1517 | /* | ||
1518 | * L1's specified read shadow might not contain the TS bit, | ||
1519 | * so now that we turned on shadowing of this bit, we need to | ||
1520 | * set this bit of the shadow. Like in nested_vmx_run we need | ||
1521 | * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet | ||
1522 | * up-to-date here because we just decached cr0.TS (and we'll | ||
1523 | * only update vmcs12->guest_cr0 on nested exit). | ||
1524 | */ | ||
1525 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
1526 | vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) | | ||
1527 | (vcpu->arch.cr0 & X86_CR0_TS); | ||
1528 | vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); | ||
1529 | } else | ||
1530 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
1039 | } | 1531 | } |
1040 | 1532 | ||
1041 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 1533 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
@@ -1119,6 +1611,25 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | |||
1119 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | 1611 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); |
1120 | } | 1612 | } |
1121 | 1613 | ||
1614 | /* | ||
1615 | * KVM wants to inject page-faults which it got to the guest. This function | ||
1616 | * checks whether in a nested guest, we need to inject them to L1 or L2. | ||
1617 | * This function assumes it is called with the exit reason in vmcs02 being | ||
1618 | * a #PF exception (this is the only case in which KVM injects a #PF when L2 | ||
1619 | * is running). | ||
1620 | */ | ||
1621 | static int nested_pf_handled(struct kvm_vcpu *vcpu) | ||
1622 | { | ||
1623 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
1624 | |||
1625 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | ||
1626 | if (!(vmcs12->exception_bitmap & PF_VECTOR)) | ||
1627 | return 0; | ||
1628 | |||
1629 | nested_vmx_vmexit(vcpu); | ||
1630 | return 1; | ||
1631 | } | ||
1632 | |||
1122 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 1633 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
1123 | bool has_error_code, u32 error_code, | 1634 | bool has_error_code, u32 error_code, |
1124 | bool reinject) | 1635 | bool reinject) |
@@ -1126,6 +1637,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1126 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1637 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1127 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 1638 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
1128 | 1639 | ||
1640 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && | ||
1641 | nested_pf_handled(vcpu)) | ||
1642 | return; | ||
1643 | |||
1129 | if (has_error_code) { | 1644 | if (has_error_code) { |
1130 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | 1645 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); |
1131 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; | 1646 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; |
@@ -1248,12 +1763,24 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | |||
1248 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | 1763 | static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) |
1249 | { | 1764 | { |
1250 | vmcs_write64(TSC_OFFSET, offset); | 1765 | vmcs_write64(TSC_OFFSET, offset); |
1766 | if (is_guest_mode(vcpu)) | ||
1767 | /* | ||
1768 | * We're here if L1 chose not to trap the TSC MSR. Since | ||
1769 | * prepare_vmcs12() does not copy tsc_offset, we need to also | ||
1770 | * set the vmcs12 field here. | ||
1771 | */ | ||
1772 | get_vmcs12(vcpu)->tsc_offset = offset - | ||
1773 | to_vmx(vcpu)->nested.vmcs01_tsc_offset; | ||
1251 | } | 1774 | } |
1252 | 1775 | ||
1253 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 1776 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) |
1254 | { | 1777 | { |
1255 | u64 offset = vmcs_read64(TSC_OFFSET); | 1778 | u64 offset = vmcs_read64(TSC_OFFSET); |
1256 | vmcs_write64(TSC_OFFSET, offset + adjustment); | 1779 | vmcs_write64(TSC_OFFSET, offset + adjustment); |
1780 | if (is_guest_mode(vcpu)) { | ||
1781 | /* Even when running L2, the adjustment needs to apply to L1 */ | ||
1782 | to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; | ||
1783 | } | ||
1257 | } | 1784 | } |
1258 | 1785 | ||
1259 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | 1786 | static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) |
@@ -1261,6 +1788,236 @@ static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) | |||
1261 | return target_tsc - native_read_tsc(); | 1788 | return target_tsc - native_read_tsc(); |
1262 | } | 1789 | } |
1263 | 1790 | ||
1791 | static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) | ||
1792 | { | ||
1793 | struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
1794 | return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31))); | ||
1795 | } | ||
1796 | |||
1797 | /* | ||
1798 | * nested_vmx_allowed() checks whether a guest should be allowed to use VMX | ||
1799 | * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for | ||
1800 | * all guests if the "nested" module option is off, and can also be disabled | ||
1801 | * for a single guest by disabling its VMX cpuid bit. | ||
1802 | */ | ||
1803 | static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) | ||
1804 | { | ||
1805 | return nested && guest_cpuid_has_vmx(vcpu); | ||
1806 | } | ||
1807 | |||
1808 | /* | ||
1809 | * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be | ||
1810 | * returned for the various VMX controls MSRs when nested VMX is enabled. | ||
1811 | * The same values should also be used to verify that vmcs12 control fields are | ||
1812 | * valid during nested entry from L1 to L2. | ||
1813 | * Each of these control msrs has a low and high 32-bit half: A low bit is on | ||
1814 | * if the corresponding bit in the (32-bit) control field *must* be on, and a | ||
1815 | * bit in the high half is on if the corresponding bit in the control field | ||
1816 | * may be on. See also vmx_control_verify(). | ||
1817 | * TODO: allow these variables to be modified (downgraded) by module options | ||
1818 | * or other means. | ||
1819 | */ | ||
1820 | static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; | ||
1821 | static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; | ||
1822 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | ||
1823 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | ||
1824 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | ||
1825 | static __init void nested_vmx_setup_ctls_msrs(void) | ||
1826 | { | ||
1827 | /* | ||
1828 | * Note that as a general rule, the high half of the MSRs (bits in | ||
1829 | * the control fields which may be 1) should be initialized by the | ||
1830 | * intersection of the underlying hardware's MSR (i.e., features which | ||
1831 | * can be supported) and the list of features we want to expose - | ||
1832 | * because they are known to be properly supported in our code. | ||
1833 | * Also, usually, the low half of the MSRs (bits which must be 1) can | ||
1834 | * be set to 0, meaning that L1 may turn off any of these bits. The | ||
1835 | * reason is that if one of these bits is necessary, it will appear | ||
1836 | * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control | ||
1837 | * fields of vmcs01 and vmcs02, will turn these bits off - and | ||
1838 | * nested_vmx_exit_handled() will not pass related exits to L1. | ||
1839 | * These rules have exceptions below. | ||
1840 | */ | ||
1841 | |||
1842 | /* pin-based controls */ | ||
1843 | /* | ||
1844 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is | ||
1845 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. | ||
1846 | */ | ||
1847 | nested_vmx_pinbased_ctls_low = 0x16 ; | ||
1848 | nested_vmx_pinbased_ctls_high = 0x16 | | ||
1849 | PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | | ||
1850 | PIN_BASED_VIRTUAL_NMIS; | ||
1851 | |||
1852 | /* exit controls */ | ||
1853 | nested_vmx_exit_ctls_low = 0; | ||
1854 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ | ||
1855 | #ifdef CONFIG_X86_64 | ||
1856 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; | ||
1857 | #else | ||
1858 | nested_vmx_exit_ctls_high = 0; | ||
1859 | #endif | ||
1860 | |||
1861 | /* entry controls */ | ||
1862 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | ||
1863 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); | ||
1864 | nested_vmx_entry_ctls_low = 0; | ||
1865 | nested_vmx_entry_ctls_high &= | ||
1866 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; | ||
1867 | |||
1868 | /* cpu-based controls */ | ||
1869 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | ||
1870 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); | ||
1871 | nested_vmx_procbased_ctls_low = 0; | ||
1872 | nested_vmx_procbased_ctls_high &= | ||
1873 | CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING | | ||
1874 | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | | ||
1875 | CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING | | ||
1876 | CPU_BASED_CR3_STORE_EXITING | | ||
1877 | #ifdef CONFIG_X86_64 | ||
1878 | CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | | ||
1879 | #endif | ||
1880 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | ||
1881 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | ||
1882 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | ||
1883 | /* | ||
1884 | * We can allow some features even when not supported by the | ||
1885 | * hardware. For example, L1 can specify an MSR bitmap - and we | ||
1886 | * can use it to avoid exits to L1 - even when L0 runs L2 | ||
1887 | * without MSR bitmaps. | ||
1888 | */ | ||
1889 | nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; | ||
1890 | |||
1891 | /* secondary cpu-based controls */ | ||
1892 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | ||
1893 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); | ||
1894 | nested_vmx_secondary_ctls_low = 0; | ||
1895 | nested_vmx_secondary_ctls_high &= | ||
1896 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
1897 | } | ||
1898 | |||
1899 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) | ||
1900 | { | ||
1901 | /* | ||
1902 | * Bits 0 in high must be 0, and bits 1 in low must be 1. | ||
1903 | */ | ||
1904 | return ((control & high) | low) == control; | ||
1905 | } | ||
1906 | |||
1907 | static inline u64 vmx_control_msr(u32 low, u32 high) | ||
1908 | { | ||
1909 | return low | ((u64)high << 32); | ||
1910 | } | ||
1911 | |||
1912 | /* | ||
1913 | * If we allow our guest to use VMX instructions (i.e., nested VMX), we should | ||
1914 | * also let it use VMX-specific MSRs. | ||
1915 | * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a | ||
1916 | * VMX-specific MSR, or 0 when we haven't (and the caller should handle it | ||
1917 | * like all other MSRs). | ||
1918 | */ | ||
1919 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | ||
1920 | { | ||
1921 | if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC && | ||
1922 | msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { | ||
1923 | /* | ||
1924 | * According to the spec, processors which do not support VMX | ||
1925 | * should throw a #GP(0) when VMX capability MSRs are read. | ||
1926 | */ | ||
1927 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
1928 | return 1; | ||
1929 | } | ||
1930 | |||
1931 | switch (msr_index) { | ||
1932 | case MSR_IA32_FEATURE_CONTROL: | ||
1933 | *pdata = 0; | ||
1934 | break; | ||
1935 | case MSR_IA32_VMX_BASIC: | ||
1936 | /* | ||
1937 | * This MSR reports some information about VMX support. We | ||
1938 | * should return information about the VMX we emulate for the | ||
1939 | * guest, and the VMCS structure we give it - not about the | ||
1940 | * VMX support of the underlying hardware. | ||
1941 | */ | ||
1942 | *pdata = VMCS12_REVISION | | ||
1943 | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | | ||
1944 | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); | ||
1945 | break; | ||
1946 | case MSR_IA32_VMX_TRUE_PINBASED_CTLS: | ||
1947 | case MSR_IA32_VMX_PINBASED_CTLS: | ||
1948 | *pdata = vmx_control_msr(nested_vmx_pinbased_ctls_low, | ||
1949 | nested_vmx_pinbased_ctls_high); | ||
1950 | break; | ||
1951 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: | ||
1952 | case MSR_IA32_VMX_PROCBASED_CTLS: | ||
1953 | *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, | ||
1954 | nested_vmx_procbased_ctls_high); | ||
1955 | break; | ||
1956 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: | ||
1957 | case MSR_IA32_VMX_EXIT_CTLS: | ||
1958 | *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, | ||
1959 | nested_vmx_exit_ctls_high); | ||
1960 | break; | ||
1961 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: | ||
1962 | case MSR_IA32_VMX_ENTRY_CTLS: | ||
1963 | *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, | ||
1964 | nested_vmx_entry_ctls_high); | ||
1965 | break; | ||
1966 | case MSR_IA32_VMX_MISC: | ||
1967 | *pdata = 0; | ||
1968 | break; | ||
1969 | /* | ||
1970 | * These MSRs specify bits which the guest must keep fixed (on or off) | ||
1971 | * while L1 is in VMXON mode (in L1's root mode, or running an L2). | ||
1972 | * We picked the standard core2 setting. | ||
1973 | */ | ||
1974 | #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) | ||
1975 | #define VMXON_CR4_ALWAYSON X86_CR4_VMXE | ||
1976 | case MSR_IA32_VMX_CR0_FIXED0: | ||
1977 | *pdata = VMXON_CR0_ALWAYSON; | ||
1978 | break; | ||
1979 | case MSR_IA32_VMX_CR0_FIXED1: | ||
1980 | *pdata = -1ULL; | ||
1981 | break; | ||
1982 | case MSR_IA32_VMX_CR4_FIXED0: | ||
1983 | *pdata = VMXON_CR4_ALWAYSON; | ||
1984 | break; | ||
1985 | case MSR_IA32_VMX_CR4_FIXED1: | ||
1986 | *pdata = -1ULL; | ||
1987 | break; | ||
1988 | case MSR_IA32_VMX_VMCS_ENUM: | ||
1989 | *pdata = 0x1f; | ||
1990 | break; | ||
1991 | case MSR_IA32_VMX_PROCBASED_CTLS2: | ||
1992 | *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, | ||
1993 | nested_vmx_secondary_ctls_high); | ||
1994 | break; | ||
1995 | case MSR_IA32_VMX_EPT_VPID_CAP: | ||
1996 | /* Currently, no nested ept or nested vpid */ | ||
1997 | *pdata = 0; | ||
1998 | break; | ||
1999 | default: | ||
2000 | return 0; | ||
2001 | } | ||
2002 | |||
2003 | return 1; | ||
2004 | } | ||
2005 | |||
2006 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | ||
2007 | { | ||
2008 | if (!nested_vmx_allowed(vcpu)) | ||
2009 | return 0; | ||
2010 | |||
2011 | if (msr_index == MSR_IA32_FEATURE_CONTROL) | ||
2012 | /* TODO: the right thing. */ | ||
2013 | return 1; | ||
2014 | /* | ||
2015 | * No need to treat VMX capability MSRs specially: If we don't handle | ||
2016 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) | ||
2017 | */ | ||
2018 | return 0; | ||
2019 | } | ||
2020 | |||
1264 | /* | 2021 | /* |
1265 | * Reads an msr value (of 'msr_index') into 'pdata'. | 2022 | * Reads an msr value (of 'msr_index') into 'pdata'. |
1266 | * Returns 0 on success, non-0 otherwise. | 2023 | * Returns 0 on success, non-0 otherwise. |
@@ -1309,6 +2066,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1309 | /* Otherwise falls through */ | 2066 | /* Otherwise falls through */ |
1310 | default: | 2067 | default: |
1311 | vmx_load_host_state(to_vmx(vcpu)); | 2068 | vmx_load_host_state(to_vmx(vcpu)); |
2069 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) | ||
2070 | return 0; | ||
1312 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2071 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
1313 | if (msr) { | 2072 | if (msr) { |
1314 | vmx_load_host_state(to_vmx(vcpu)); | 2073 | vmx_load_host_state(to_vmx(vcpu)); |
@@ -1380,6 +2139,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1380 | return 1; | 2139 | return 1; |
1381 | /* Otherwise falls through */ | 2140 | /* Otherwise falls through */ |
1382 | default: | 2141 | default: |
2142 | if (vmx_set_vmx_msr(vcpu, msr_index, data)) | ||
2143 | break; | ||
1383 | msr = find_msr_entry(vmx, msr_index); | 2144 | msr = find_msr_entry(vmx, msr_index); |
1384 | if (msr) { | 2145 | if (msr) { |
1385 | vmx_load_host_state(vmx); | 2146 | vmx_load_host_state(vmx); |
@@ -1469,7 +2230,7 @@ static int hardware_enable(void *garbage) | |||
1469 | if (read_cr4() & X86_CR4_VMXE) | 2230 | if (read_cr4() & X86_CR4_VMXE) |
1470 | return -EBUSY; | 2231 | return -EBUSY; |
1471 | 2232 | ||
1472 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 2233 | INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); |
1473 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 2234 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1474 | 2235 | ||
1475 | test_bits = FEATURE_CONTROL_LOCKED; | 2236 | test_bits = FEATURE_CONTROL_LOCKED; |
@@ -1493,14 +2254,14 @@ static int hardware_enable(void *garbage) | |||
1493 | return 0; | 2254 | return 0; |
1494 | } | 2255 | } |
1495 | 2256 | ||
1496 | static void vmclear_local_vcpus(void) | 2257 | static void vmclear_local_loaded_vmcss(void) |
1497 | { | 2258 | { |
1498 | int cpu = raw_smp_processor_id(); | 2259 | int cpu = raw_smp_processor_id(); |
1499 | struct vcpu_vmx *vmx, *n; | 2260 | struct loaded_vmcs *v, *n; |
1500 | 2261 | ||
1501 | list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu), | 2262 | list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), |
1502 | local_vcpus_link) | 2263 | loaded_vmcss_on_cpu_link) |
1503 | __vcpu_clear(vmx); | 2264 | __loaded_vmcs_clear(v); |
1504 | } | 2265 | } |
1505 | 2266 | ||
1506 | 2267 | ||
@@ -1515,7 +2276,7 @@ static void kvm_cpu_vmxoff(void) | |||
1515 | static void hardware_disable(void *garbage) | 2276 | static void hardware_disable(void *garbage) |
1516 | { | 2277 | { |
1517 | if (vmm_exclusive) { | 2278 | if (vmm_exclusive) { |
1518 | vmclear_local_vcpus(); | 2279 | vmclear_local_loaded_vmcss(); |
1519 | kvm_cpu_vmxoff(); | 2280 | kvm_cpu_vmxoff(); |
1520 | } | 2281 | } |
1521 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | 2282 | write_cr4(read_cr4() & ~X86_CR4_VMXE); |
@@ -1696,6 +2457,18 @@ static void free_vmcs(struct vmcs *vmcs) | |||
1696 | free_pages((unsigned long)vmcs, vmcs_config.order); | 2457 | free_pages((unsigned long)vmcs, vmcs_config.order); |
1697 | } | 2458 | } |
1698 | 2459 | ||
2460 | /* | ||
2461 | * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded | ||
2462 | */ | ||
2463 | static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | ||
2464 | { | ||
2465 | if (!loaded_vmcs->vmcs) | ||
2466 | return; | ||
2467 | loaded_vmcs_clear(loaded_vmcs); | ||
2468 | free_vmcs(loaded_vmcs->vmcs); | ||
2469 | loaded_vmcs->vmcs = NULL; | ||
2470 | } | ||
2471 | |||
1699 | static void free_kvm_area(void) | 2472 | static void free_kvm_area(void) |
1700 | { | 2473 | { |
1701 | int cpu; | 2474 | int cpu; |
@@ -1756,6 +2529,9 @@ static __init int hardware_setup(void) | |||
1756 | if (!cpu_has_vmx_ple()) | 2529 | if (!cpu_has_vmx_ple()) |
1757 | ple_gap = 0; | 2530 | ple_gap = 0; |
1758 | 2531 | ||
2532 | if (nested) | ||
2533 | nested_vmx_setup_ctls_msrs(); | ||
2534 | |||
1759 | return alloc_kvm_area(); | 2535 | return alloc_kvm_area(); |
1760 | } | 2536 | } |
1761 | 2537 | ||
@@ -2041,7 +2817,7 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu) | |||
2041 | (unsigned long *)&vcpu->arch.regs_dirty); | 2817 | (unsigned long *)&vcpu->arch.regs_dirty); |
2042 | } | 2818 | } |
2043 | 2819 | ||
2044 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 2820 | static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
2045 | 2821 | ||
2046 | static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | 2822 | static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, |
2047 | unsigned long cr0, | 2823 | unsigned long cr0, |
@@ -2139,11 +2915,23 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
2139 | vmcs_writel(GUEST_CR3, guest_cr3); | 2915 | vmcs_writel(GUEST_CR3, guest_cr3); |
2140 | } | 2916 | } |
2141 | 2917 | ||
2142 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 2918 | static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
2143 | { | 2919 | { |
2144 | unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ? | 2920 | unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ? |
2145 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 2921 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
2146 | 2922 | ||
2923 | if (cr4 & X86_CR4_VMXE) { | ||
2924 | /* | ||
2925 | * To use VMXON (and later other VMX instructions), a guest | ||
2926 | * must first be able to turn on cr4.VMXE (see handle_vmon()). | ||
2927 | * So basically the check on whether to allow nested VMX | ||
2928 | * is here. | ||
2929 | */ | ||
2930 | if (!nested_vmx_allowed(vcpu)) | ||
2931 | return 1; | ||
2932 | } else if (to_vmx(vcpu)->nested.vmxon) | ||
2933 | return 1; | ||
2934 | |||
2147 | vcpu->arch.cr4 = cr4; | 2935 | vcpu->arch.cr4 = cr4; |
2148 | if (enable_ept) { | 2936 | if (enable_ept) { |
2149 | if (!is_paging(vcpu)) { | 2937 | if (!is_paging(vcpu)) { |
@@ -2156,6 +2944,7 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
2156 | 2944 | ||
2157 | vmcs_writel(CR4_READ_SHADOW, cr4); | 2945 | vmcs_writel(CR4_READ_SHADOW, cr4); |
2158 | vmcs_writel(GUEST_CR4, hw_cr4); | 2946 | vmcs_writel(GUEST_CR4, hw_cr4); |
2947 | return 0; | ||
2159 | } | 2948 | } |
2160 | 2949 | ||
2161 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 2950 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
@@ -2721,18 +3510,110 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | |||
2721 | } | 3510 | } |
2722 | 3511 | ||
2723 | /* | 3512 | /* |
3513 | * Set up the vmcs's constant host-state fields, i.e., host-state fields that | ||
3514 | * will not change in the lifetime of the guest. | ||
3515 | * Note that host-state that does change is set elsewhere. E.g., host-state | ||
3516 | * that is set differently for each CPU is set in vmx_vcpu_load(), not here. | ||
3517 | */ | ||
3518 | static void vmx_set_constant_host_state(void) | ||
3519 | { | ||
3520 | u32 low32, high32; | ||
3521 | unsigned long tmpl; | ||
3522 | struct desc_ptr dt; | ||
3523 | |||
3524 | vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ | ||
3525 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ | ||
3526 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | ||
3527 | |||
3528 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ | ||
3529 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | ||
3530 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | ||
3531 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | ||
3532 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | ||
3533 | |||
3534 | native_store_idt(&dt); | ||
3535 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | ||
3536 | |||
3537 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); | ||
3538 | vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */ | ||
3539 | |||
3540 | rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); | ||
3541 | vmcs_write32(HOST_IA32_SYSENTER_CS, low32); | ||
3542 | rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); | ||
3543 | vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ | ||
3544 | |||
3545 | if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { | ||
3546 | rdmsr(MSR_IA32_CR_PAT, low32, high32); | ||
3547 | vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); | ||
3548 | } | ||
3549 | } | ||
3550 | |||
3551 | static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) | ||
3552 | { | ||
3553 | vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; | ||
3554 | if (enable_ept) | ||
3555 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; | ||
3556 | if (is_guest_mode(&vmx->vcpu)) | ||
3557 | vmx->vcpu.arch.cr4_guest_owned_bits &= | ||
3558 | ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask; | ||
3559 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | ||
3560 | } | ||
3561 | |||
3562 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | ||
3563 | { | ||
3564 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; | ||
3565 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { | ||
3566 | exec_control &= ~CPU_BASED_TPR_SHADOW; | ||
3567 | #ifdef CONFIG_X86_64 | ||
3568 | exec_control |= CPU_BASED_CR8_STORE_EXITING | | ||
3569 | CPU_BASED_CR8_LOAD_EXITING; | ||
3570 | #endif | ||
3571 | } | ||
3572 | if (!enable_ept) | ||
3573 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | ||
3574 | CPU_BASED_CR3_LOAD_EXITING | | ||
3575 | CPU_BASED_INVLPG_EXITING; | ||
3576 | return exec_control; | ||
3577 | } | ||
3578 | |||
3579 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | ||
3580 | { | ||
3581 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | ||
3582 | if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) | ||
3583 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
3584 | if (vmx->vpid == 0) | ||
3585 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | ||
3586 | if (!enable_ept) { | ||
3587 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | ||
3588 | enable_unrestricted_guest = 0; | ||
3589 | } | ||
3590 | if (!enable_unrestricted_guest) | ||
3591 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | ||
3592 | if (!ple_gap) | ||
3593 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
3594 | return exec_control; | ||
3595 | } | ||
3596 | |||
3597 | static void ept_set_mmio_spte_mask(void) | ||
3598 | { | ||
3599 | /* | ||
3600 | * EPT Misconfigurations can be generated if the value of bits 2:0 | ||
3601 | * of an EPT paging-structure entry is 110b (write/execute). | ||
3602 | * Also, magic bits (0xffull << 49) is set to quickly identify mmio | ||
3603 | * spte. | ||
3604 | */ | ||
3605 | kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull); | ||
3606 | } | ||
3607 | |||
3608 | /* | ||
2724 | * Sets up the vmcs for emulated real mode. | 3609 | * Sets up the vmcs for emulated real mode. |
2725 | */ | 3610 | */ |
2726 | static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | 3611 | static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
2727 | { | 3612 | { |
2728 | u32 host_sysenter_cs, msr_low, msr_high; | 3613 | #ifdef CONFIG_X86_64 |
2729 | u32 junk; | ||
2730 | u64 host_pat; | ||
2731 | unsigned long a; | 3614 | unsigned long a; |
2732 | struct desc_ptr dt; | 3615 | #endif |
2733 | int i; | 3616 | int i; |
2734 | unsigned long kvm_vmx_return; | ||
2735 | u32 exec_control; | ||
2736 | 3617 | ||
2737 | /* I/O */ | 3618 | /* I/O */ |
2738 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); | 3619 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); |
@@ -2747,36 +3628,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2747 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, | 3628 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, |
2748 | vmcs_config.pin_based_exec_ctrl); | 3629 | vmcs_config.pin_based_exec_ctrl); |
2749 | 3630 | ||
2750 | exec_control = vmcs_config.cpu_based_exec_ctrl; | 3631 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
2751 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { | ||
2752 | exec_control &= ~CPU_BASED_TPR_SHADOW; | ||
2753 | #ifdef CONFIG_X86_64 | ||
2754 | exec_control |= CPU_BASED_CR8_STORE_EXITING | | ||
2755 | CPU_BASED_CR8_LOAD_EXITING; | ||
2756 | #endif | ||
2757 | } | ||
2758 | if (!enable_ept) | ||
2759 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | ||
2760 | CPU_BASED_CR3_LOAD_EXITING | | ||
2761 | CPU_BASED_INVLPG_EXITING; | ||
2762 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | ||
2763 | 3632 | ||
2764 | if (cpu_has_secondary_exec_ctrls()) { | 3633 | if (cpu_has_secondary_exec_ctrls()) { |
2765 | exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 3634 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, |
2766 | if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) | 3635 | vmx_secondary_exec_control(vmx)); |
2767 | exec_control &= | ||
2768 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
2769 | if (vmx->vpid == 0) | ||
2770 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | ||
2771 | if (!enable_ept) { | ||
2772 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | ||
2773 | enable_unrestricted_guest = 0; | ||
2774 | } | ||
2775 | if (!enable_unrestricted_guest) | ||
2776 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | ||
2777 | if (!ple_gap) | ||
2778 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
2779 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
2780 | } | 3636 | } |
2781 | 3637 | ||
2782 | if (ple_gap) { | 3638 | if (ple_gap) { |
@@ -2784,20 +3640,13 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2784 | vmcs_write32(PLE_WINDOW, ple_window); | 3640 | vmcs_write32(PLE_WINDOW, ple_window); |
2785 | } | 3641 | } |
2786 | 3642 | ||
2787 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); | 3643 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); |
2788 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 3644 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); |
2789 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 3645 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
2790 | 3646 | ||
2791 | vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ | ||
2792 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ | ||
2793 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | ||
2794 | |||
2795 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ | ||
2796 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | ||
2797 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | ||
2798 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ | 3647 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ |
2799 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ | 3648 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ |
2800 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 3649 | vmx_set_constant_host_state(); |
2801 | #ifdef CONFIG_X86_64 | 3650 | #ifdef CONFIG_X86_64 |
2802 | rdmsrl(MSR_FS_BASE, a); | 3651 | rdmsrl(MSR_FS_BASE, a); |
2803 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ | 3652 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ |
@@ -2808,32 +3657,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2808 | vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ | 3657 | vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ |
2809 | #endif | 3658 | #endif |
2810 | 3659 | ||
2811 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | ||
2812 | |||
2813 | native_store_idt(&dt); | ||
2814 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | ||
2815 | |||
2816 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | ||
2817 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ | ||
2818 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 3660 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
2819 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 3661 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
2820 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | 3662 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); |
2821 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | 3663 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); |
2822 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); | 3664 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); |
2823 | 3665 | ||
2824 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); | ||
2825 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); | ||
2826 | rdmsrl(MSR_IA32_SYSENTER_ESP, a); | ||
2827 | vmcs_writel(HOST_IA32_SYSENTER_ESP, a); /* 22.2.3 */ | ||
2828 | rdmsrl(MSR_IA32_SYSENTER_EIP, a); | ||
2829 | vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ | ||
2830 | |||
2831 | if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { | ||
2832 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); | ||
2833 | host_pat = msr_low | ((u64) msr_high << 32); | ||
2834 | vmcs_write64(HOST_IA32_PAT, host_pat); | ||
2835 | } | ||
2836 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 3666 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
3667 | u32 msr_low, msr_high; | ||
3668 | u64 host_pat; | ||
2837 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); | 3669 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); |
2838 | host_pat = msr_low | ((u64) msr_high << 32); | 3670 | host_pat = msr_low | ((u64) msr_high << 32); |
2839 | /* Write the default value follow host pat */ | 3671 | /* Write the default value follow host pat */ |
@@ -2863,10 +3695,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2863 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 3695 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); |
2864 | 3696 | ||
2865 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 3697 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
2866 | vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; | 3698 | set_cr4_guest_host_mask(vmx); |
2867 | if (enable_ept) | ||
2868 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; | ||
2869 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | ||
2870 | 3699 | ||
2871 | kvm_write_tsc(&vmx->vcpu, 0); | 3700 | kvm_write_tsc(&vmx->vcpu, 0); |
2872 | 3701 | ||
@@ -2990,9 +3819,25 @@ out: | |||
2990 | return ret; | 3819 | return ret; |
2991 | } | 3820 | } |
2992 | 3821 | ||
3822 | /* | ||
3823 | * In nested virtualization, check if L1 asked to exit on external interrupts. | ||
3824 | * For most existing hypervisors, this will always return true. | ||
3825 | */ | ||
3826 | static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) | ||
3827 | { | ||
3828 | return get_vmcs12(vcpu)->pin_based_vm_exec_control & | ||
3829 | PIN_BASED_EXT_INTR_MASK; | ||
3830 | } | ||
3831 | |||
2993 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 3832 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
2994 | { | 3833 | { |
2995 | u32 cpu_based_vm_exec_control; | 3834 | u32 cpu_based_vm_exec_control; |
3835 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | ||
3836 | /* We can get here when nested_run_pending caused | ||
3837 | * vmx_interrupt_allowed() to return false. In this case, do | ||
3838 | * nothing - the interrupt will be injected later. | ||
3839 | */ | ||
3840 | return; | ||
2996 | 3841 | ||
2997 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 3842 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
2998 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 3843 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
@@ -3049,6 +3894,9 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
3049 | { | 3894 | { |
3050 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3895 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3051 | 3896 | ||
3897 | if (is_guest_mode(vcpu)) | ||
3898 | return; | ||
3899 | |||
3052 | if (!cpu_has_virtual_nmis()) { | 3900 | if (!cpu_has_virtual_nmis()) { |
3053 | /* | 3901 | /* |
3054 | * Tracking the NMI-blocked state in software is built upon | 3902 | * Tracking the NMI-blocked state in software is built upon |
@@ -3115,6 +3963,17 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
3115 | 3963 | ||
3116 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 3964 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
3117 | { | 3965 | { |
3966 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | ||
3967 | struct vmcs12 *vmcs12; | ||
3968 | if (to_vmx(vcpu)->nested.nested_run_pending) | ||
3969 | return 0; | ||
3970 | nested_vmx_vmexit(vcpu); | ||
3971 | vmcs12 = get_vmcs12(vcpu); | ||
3972 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | ||
3973 | vmcs12->vm_exit_intr_info = 0; | ||
3974 | /* fall through to normal code, but now in L1, not L2 */ | ||
3975 | } | ||
3976 | |||
3118 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | 3977 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
3119 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 3978 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
3120 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); | 3979 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); |
@@ -3356,6 +4215,58 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
3356 | hypercall[2] = 0xc1; | 4215 | hypercall[2] = 0xc1; |
3357 | } | 4216 | } |
3358 | 4217 | ||
4218 | /* called to set cr0 as approriate for a mov-to-cr0 exit. */ | ||
4219 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | ||
4220 | { | ||
4221 | if (to_vmx(vcpu)->nested.vmxon && | ||
4222 | ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) | ||
4223 | return 1; | ||
4224 | |||
4225 | if (is_guest_mode(vcpu)) { | ||
4226 | /* | ||
4227 | * We get here when L2 changed cr0 in a way that did not change | ||
4228 | * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), | ||
4229 | * but did change L0 shadowed bits. This can currently happen | ||
4230 | * with the TS bit: L0 may want to leave TS on (for lazy fpu | ||
4231 | * loading) while pretending to allow the guest to change it. | ||
4232 | */ | ||
4233 | if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) | | ||
4234 | (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits))) | ||
4235 | return 1; | ||
4236 | vmcs_writel(CR0_READ_SHADOW, val); | ||
4237 | return 0; | ||
4238 | } else | ||
4239 | return kvm_set_cr0(vcpu, val); | ||
4240 | } | ||
4241 | |||
4242 | static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) | ||
4243 | { | ||
4244 | if (is_guest_mode(vcpu)) { | ||
4245 | if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) | | ||
4246 | (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits))) | ||
4247 | return 1; | ||
4248 | vmcs_writel(CR4_READ_SHADOW, val); | ||
4249 | return 0; | ||
4250 | } else | ||
4251 | return kvm_set_cr4(vcpu, val); | ||
4252 | } | ||
4253 | |||
4254 | /* called to set cr0 as approriate for clts instruction exit. */ | ||
4255 | static void handle_clts(struct kvm_vcpu *vcpu) | ||
4256 | { | ||
4257 | if (is_guest_mode(vcpu)) { | ||
4258 | /* | ||
4259 | * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS | ||
4260 | * but we did (!fpu_active). We need to keep GUEST_CR0.TS on, | ||
4261 | * just pretend it's off (also in arch.cr0 for fpu_activate). | ||
4262 | */ | ||
4263 | vmcs_writel(CR0_READ_SHADOW, | ||
4264 | vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS); | ||
4265 | vcpu->arch.cr0 &= ~X86_CR0_TS; | ||
4266 | } else | ||
4267 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | ||
4268 | } | ||
4269 | |||
3359 | static int handle_cr(struct kvm_vcpu *vcpu) | 4270 | static int handle_cr(struct kvm_vcpu *vcpu) |
3360 | { | 4271 | { |
3361 | unsigned long exit_qualification, val; | 4272 | unsigned long exit_qualification, val; |
@@ -3372,7 +4283,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3372 | trace_kvm_cr_write(cr, val); | 4283 | trace_kvm_cr_write(cr, val); |
3373 | switch (cr) { | 4284 | switch (cr) { |
3374 | case 0: | 4285 | case 0: |
3375 | err = kvm_set_cr0(vcpu, val); | 4286 | err = handle_set_cr0(vcpu, val); |
3376 | kvm_complete_insn_gp(vcpu, err); | 4287 | kvm_complete_insn_gp(vcpu, err); |
3377 | return 1; | 4288 | return 1; |
3378 | case 3: | 4289 | case 3: |
@@ -3380,7 +4291,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3380 | kvm_complete_insn_gp(vcpu, err); | 4291 | kvm_complete_insn_gp(vcpu, err); |
3381 | return 1; | 4292 | return 1; |
3382 | case 4: | 4293 | case 4: |
3383 | err = kvm_set_cr4(vcpu, val); | 4294 | err = handle_set_cr4(vcpu, val); |
3384 | kvm_complete_insn_gp(vcpu, err); | 4295 | kvm_complete_insn_gp(vcpu, err); |
3385 | return 1; | 4296 | return 1; |
3386 | case 8: { | 4297 | case 8: { |
@@ -3398,7 +4309,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3398 | }; | 4309 | }; |
3399 | break; | 4310 | break; |
3400 | case 2: /* clts */ | 4311 | case 2: /* clts */ |
3401 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | 4312 | handle_clts(vcpu); |
3402 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); | 4313 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); |
3403 | skip_emulated_instruction(vcpu); | 4314 | skip_emulated_instruction(vcpu); |
3404 | vmx_fpu_activate(vcpu); | 4315 | vmx_fpu_activate(vcpu); |
@@ -3574,12 +4485,6 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) | |||
3574 | return 1; | 4485 | return 1; |
3575 | } | 4486 | } |
3576 | 4487 | ||
3577 | static int handle_vmx_insn(struct kvm_vcpu *vcpu) | ||
3578 | { | ||
3579 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3580 | return 1; | ||
3581 | } | ||
3582 | |||
3583 | static int handle_invd(struct kvm_vcpu *vcpu) | 4488 | static int handle_invd(struct kvm_vcpu *vcpu) |
3584 | { | 4489 | { |
3585 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 4490 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
@@ -3777,11 +4682,19 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | |||
3777 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | 4682 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) |
3778 | { | 4683 | { |
3779 | u64 sptes[4]; | 4684 | u64 sptes[4]; |
3780 | int nr_sptes, i; | 4685 | int nr_sptes, i, ret; |
3781 | gpa_t gpa; | 4686 | gpa_t gpa; |
3782 | 4687 | ||
3783 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 4688 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
3784 | 4689 | ||
4690 | ret = handle_mmio_page_fault_common(vcpu, gpa, true); | ||
4691 | if (likely(ret == 1)) | ||
4692 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == | ||
4693 | EMULATE_DONE; | ||
4694 | if (unlikely(!ret)) | ||
4695 | return 1; | ||
4696 | |||
4697 | /* It is the real ept misconfig */ | ||
3785 | printk(KERN_ERR "EPT: Misconfiguration.\n"); | 4698 | printk(KERN_ERR "EPT: Misconfiguration.\n"); |
3786 | printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa); | 4699 | printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa); |
3787 | 4700 | ||
@@ -3866,6 +4779,639 @@ static int handle_invalid_op(struct kvm_vcpu *vcpu) | |||
3866 | } | 4779 | } |
3867 | 4780 | ||
3868 | /* | 4781 | /* |
4782 | * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. | ||
4783 | * We could reuse a single VMCS for all the L2 guests, but we also want the | ||
4784 | * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this | ||
4785 | * allows keeping them loaded on the processor, and in the future will allow | ||
4786 | * optimizations where prepare_vmcs02 doesn't need to set all the fields on | ||
4787 | * every entry if they never change. | ||
4788 | * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE | ||
4789 | * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. | ||
4790 | * | ||
4791 | * The following functions allocate and free a vmcs02 in this pool. | ||
4792 | */ | ||
4793 | |||
4794 | /* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ | ||
4795 | static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | ||
4796 | { | ||
4797 | struct vmcs02_list *item; | ||
4798 | list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
4799 | if (item->vmptr == vmx->nested.current_vmptr) { | ||
4800 | list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
4801 | return &item->vmcs02; | ||
4802 | } | ||
4803 | |||
4804 | if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { | ||
4805 | /* Recycle the least recently used VMCS. */ | ||
4806 | item = list_entry(vmx->nested.vmcs02_pool.prev, | ||
4807 | struct vmcs02_list, list); | ||
4808 | item->vmptr = vmx->nested.current_vmptr; | ||
4809 | list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
4810 | return &item->vmcs02; | ||
4811 | } | ||
4812 | |||
4813 | /* Create a new VMCS */ | ||
4814 | item = (struct vmcs02_list *) | ||
4815 | kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); | ||
4816 | if (!item) | ||
4817 | return NULL; | ||
4818 | item->vmcs02.vmcs = alloc_vmcs(); | ||
4819 | if (!item->vmcs02.vmcs) { | ||
4820 | kfree(item); | ||
4821 | return NULL; | ||
4822 | } | ||
4823 | loaded_vmcs_init(&item->vmcs02); | ||
4824 | item->vmptr = vmx->nested.current_vmptr; | ||
4825 | list_add(&(item->list), &(vmx->nested.vmcs02_pool)); | ||
4826 | vmx->nested.vmcs02_num++; | ||
4827 | return &item->vmcs02; | ||
4828 | } | ||
4829 | |||
4830 | /* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ | ||
4831 | static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) | ||
4832 | { | ||
4833 | struct vmcs02_list *item; | ||
4834 | list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
4835 | if (item->vmptr == vmptr) { | ||
4836 | free_loaded_vmcs(&item->vmcs02); | ||
4837 | list_del(&item->list); | ||
4838 | kfree(item); | ||
4839 | vmx->nested.vmcs02_num--; | ||
4840 | return; | ||
4841 | } | ||
4842 | } | ||
4843 | |||
4844 | /* | ||
4845 | * Free all VMCSs saved for this vcpu, except the one pointed by | ||
4846 | * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one | ||
4847 | * currently used, if running L2), and vmcs01 when running L2. | ||
4848 | */ | ||
4849 | static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | ||
4850 | { | ||
4851 | struct vmcs02_list *item, *n; | ||
4852 | list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { | ||
4853 | if (vmx->loaded_vmcs != &item->vmcs02) | ||
4854 | free_loaded_vmcs(&item->vmcs02); | ||
4855 | list_del(&item->list); | ||
4856 | kfree(item); | ||
4857 | } | ||
4858 | vmx->nested.vmcs02_num = 0; | ||
4859 | |||
4860 | if (vmx->loaded_vmcs != &vmx->vmcs01) | ||
4861 | free_loaded_vmcs(&vmx->vmcs01); | ||
4862 | } | ||
4863 | |||
4864 | /* | ||
4865 | * Emulate the VMXON instruction. | ||
4866 | * Currently, we just remember that VMX is active, and do not save or even | ||
4867 | * inspect the argument to VMXON (the so-called "VMXON pointer") because we | ||
4868 | * do not currently need to store anything in that guest-allocated memory | ||
4869 | * region. Consequently, VMCLEAR and VMPTRLD also do not verify that the their | ||
4870 | * argument is different from the VMXON pointer (which the spec says they do). | ||
4871 | */ | ||
4872 | static int handle_vmon(struct kvm_vcpu *vcpu) | ||
4873 | { | ||
4874 | struct kvm_segment cs; | ||
4875 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4876 | |||
4877 | /* The Intel VMX Instruction Reference lists a bunch of bits that | ||
4878 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | ||
4879 | * set to 1 (see vmx_set_cr4() for when we allow the guest to set this). | ||
4880 | * Otherwise, we should fail with #UD. We test these now: | ||
4881 | */ | ||
4882 | if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) || | ||
4883 | !kvm_read_cr0_bits(vcpu, X86_CR0_PE) || | ||
4884 | (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { | ||
4885 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
4886 | return 1; | ||
4887 | } | ||
4888 | |||
4889 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
4890 | if (is_long_mode(vcpu) && !cs.l) { | ||
4891 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
4892 | return 1; | ||
4893 | } | ||
4894 | |||
4895 | if (vmx_get_cpl(vcpu)) { | ||
4896 | kvm_inject_gp(vcpu, 0); | ||
4897 | return 1; | ||
4898 | } | ||
4899 | |||
4900 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
4901 | vmx->nested.vmcs02_num = 0; | ||
4902 | |||
4903 | vmx->nested.vmxon = true; | ||
4904 | |||
4905 | skip_emulated_instruction(vcpu); | ||
4906 | return 1; | ||
4907 | } | ||
4908 | |||
4909 | /* | ||
4910 | * Intel's VMX Instruction Reference specifies a common set of prerequisites | ||
4911 | * for running VMX instructions (except VMXON, whose prerequisites are | ||
4912 | * slightly different). It also specifies what exception to inject otherwise. | ||
4913 | */ | ||
4914 | static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) | ||
4915 | { | ||
4916 | struct kvm_segment cs; | ||
4917 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4918 | |||
4919 | if (!vmx->nested.vmxon) { | ||
4920 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
4921 | return 0; | ||
4922 | } | ||
4923 | |||
4924 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
4925 | if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) || | ||
4926 | (is_long_mode(vcpu) && !cs.l)) { | ||
4927 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
4928 | return 0; | ||
4929 | } | ||
4930 | |||
4931 | if (vmx_get_cpl(vcpu)) { | ||
4932 | kvm_inject_gp(vcpu, 0); | ||
4933 | return 0; | ||
4934 | } | ||
4935 | |||
4936 | return 1; | ||
4937 | } | ||
4938 | |||
4939 | /* | ||
4940 | * Free whatever needs to be freed from vmx->nested when L1 goes down, or | ||
4941 | * just stops using VMX. | ||
4942 | */ | ||
4943 | static void free_nested(struct vcpu_vmx *vmx) | ||
4944 | { | ||
4945 | if (!vmx->nested.vmxon) | ||
4946 | return; | ||
4947 | vmx->nested.vmxon = false; | ||
4948 | if (vmx->nested.current_vmptr != -1ull) { | ||
4949 | kunmap(vmx->nested.current_vmcs12_page); | ||
4950 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
4951 | vmx->nested.current_vmptr = -1ull; | ||
4952 | vmx->nested.current_vmcs12 = NULL; | ||
4953 | } | ||
4954 | /* Unpin physical memory we referred to in current vmcs02 */ | ||
4955 | if (vmx->nested.apic_access_page) { | ||
4956 | nested_release_page(vmx->nested.apic_access_page); | ||
4957 | vmx->nested.apic_access_page = 0; | ||
4958 | } | ||
4959 | |||
4960 | nested_free_all_saved_vmcss(vmx); | ||
4961 | } | ||
4962 | |||
4963 | /* Emulate the VMXOFF instruction */ | ||
4964 | static int handle_vmoff(struct kvm_vcpu *vcpu) | ||
4965 | { | ||
4966 | if (!nested_vmx_check_permission(vcpu)) | ||
4967 | return 1; | ||
4968 | free_nested(to_vmx(vcpu)); | ||
4969 | skip_emulated_instruction(vcpu); | ||
4970 | return 1; | ||
4971 | } | ||
4972 | |||
4973 | /* | ||
4974 | * Decode the memory-address operand of a vmx instruction, as recorded on an | ||
4975 | * exit caused by such an instruction (run by a guest hypervisor). | ||
4976 | * On success, returns 0. When the operand is invalid, returns 1 and throws | ||
4977 | * #UD or #GP. | ||
4978 | */ | ||
4979 | static int get_vmx_mem_address(struct kvm_vcpu *vcpu, | ||
4980 | unsigned long exit_qualification, | ||
4981 | u32 vmx_instruction_info, gva_t *ret) | ||
4982 | { | ||
4983 | /* | ||
4984 | * According to Vol. 3B, "Information for VM Exits Due to Instruction | ||
4985 | * Execution", on an exit, vmx_instruction_info holds most of the | ||
4986 | * addressing components of the operand. Only the displacement part | ||
4987 | * is put in exit_qualification (see 3B, "Basic VM-Exit Information"). | ||
4988 | * For how an actual address is calculated from all these components, | ||
4989 | * refer to Vol. 1, "Operand Addressing". | ||
4990 | */ | ||
4991 | int scaling = vmx_instruction_info & 3; | ||
4992 | int addr_size = (vmx_instruction_info >> 7) & 7; | ||
4993 | bool is_reg = vmx_instruction_info & (1u << 10); | ||
4994 | int seg_reg = (vmx_instruction_info >> 15) & 7; | ||
4995 | int index_reg = (vmx_instruction_info >> 18) & 0xf; | ||
4996 | bool index_is_valid = !(vmx_instruction_info & (1u << 22)); | ||
4997 | int base_reg = (vmx_instruction_info >> 23) & 0xf; | ||
4998 | bool base_is_valid = !(vmx_instruction_info & (1u << 27)); | ||
4999 | |||
5000 | if (is_reg) { | ||
5001 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
5002 | return 1; | ||
5003 | } | ||
5004 | |||
5005 | /* Addr = segment_base + offset */ | ||
5006 | /* offset = base + [index * scale] + displacement */ | ||
5007 | *ret = vmx_get_segment_base(vcpu, seg_reg); | ||
5008 | if (base_is_valid) | ||
5009 | *ret += kvm_register_read(vcpu, base_reg); | ||
5010 | if (index_is_valid) | ||
5011 | *ret += kvm_register_read(vcpu, index_reg)<<scaling; | ||
5012 | *ret += exit_qualification; /* holds the displacement */ | ||
5013 | |||
5014 | if (addr_size == 1) /* 32 bit */ | ||
5015 | *ret &= 0xffffffff; | ||
5016 | |||
5017 | /* | ||
5018 | * TODO: throw #GP (and return 1) in various cases that the VM* | ||
5019 | * instructions require it - e.g., offset beyond segment limit, | ||
5020 | * unusable or unreadable/unwritable segment, non-canonical 64-bit | ||
5021 | * address, and so on. Currently these are not checked. | ||
5022 | */ | ||
5023 | return 0; | ||
5024 | } | ||
5025 | |||
5026 | /* | ||
5027 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
5028 | * set the success or error code of an emulated VMX instruction, as specified | ||
5029 | * by Vol 2B, VMX Instruction Reference, "Conventions". | ||
5030 | */ | ||
5031 | static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | ||
5032 | { | ||
5033 | vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | ||
5034 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5035 | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | ||
5036 | } | ||
5037 | |||
5038 | static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | ||
5039 | { | ||
5040 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5041 | & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | ||
5042 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5043 | | X86_EFLAGS_CF); | ||
5044 | } | ||
5045 | |||
5046 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | ||
5047 | u32 vm_instruction_error) | ||
5048 | { | ||
5049 | if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | ||
5050 | /* | ||
5051 | * failValid writes the error number to the current VMCS, which | ||
5052 | * can't be done there isn't a current VMCS. | ||
5053 | */ | ||
5054 | nested_vmx_failInvalid(vcpu); | ||
5055 | return; | ||
5056 | } | ||
5057 | vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | ||
5058 | & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | ||
5059 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | ||
5060 | | X86_EFLAGS_ZF); | ||
5061 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | ||
5062 | } | ||
5063 | |||
5064 | /* Emulate the VMCLEAR instruction */ | ||
5065 | static int handle_vmclear(struct kvm_vcpu *vcpu) | ||
5066 | { | ||
5067 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5068 | gva_t gva; | ||
5069 | gpa_t vmptr; | ||
5070 | struct vmcs12 *vmcs12; | ||
5071 | struct page *page; | ||
5072 | struct x86_exception e; | ||
5073 | |||
5074 | if (!nested_vmx_check_permission(vcpu)) | ||
5075 | return 1; | ||
5076 | |||
5077 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | ||
5078 | vmcs_read32(VMX_INSTRUCTION_INFO), &gva)) | ||
5079 | return 1; | ||
5080 | |||
5081 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, | ||
5082 | sizeof(vmptr), &e)) { | ||
5083 | kvm_inject_page_fault(vcpu, &e); | ||
5084 | return 1; | ||
5085 | } | ||
5086 | |||
5087 | if (!IS_ALIGNED(vmptr, PAGE_SIZE)) { | ||
5088 | nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); | ||
5089 | skip_emulated_instruction(vcpu); | ||
5090 | return 1; | ||
5091 | } | ||
5092 | |||
5093 | if (vmptr == vmx->nested.current_vmptr) { | ||
5094 | kunmap(vmx->nested.current_vmcs12_page); | ||
5095 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5096 | vmx->nested.current_vmptr = -1ull; | ||
5097 | vmx->nested.current_vmcs12 = NULL; | ||
5098 | } | ||
5099 | |||
5100 | page = nested_get_page(vcpu, vmptr); | ||
5101 | if (page == NULL) { | ||
5102 | /* | ||
5103 | * For accurate processor emulation, VMCLEAR beyond available | ||
5104 | * physical memory should do nothing at all. However, it is | ||
5105 | * possible that a nested vmx bug, not a guest hypervisor bug, | ||
5106 | * resulted in this case, so let's shut down before doing any | ||
5107 | * more damage: | ||
5108 | */ | ||
5109 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | ||
5110 | return 1; | ||
5111 | } | ||
5112 | vmcs12 = kmap(page); | ||
5113 | vmcs12->launch_state = 0; | ||
5114 | kunmap(page); | ||
5115 | nested_release_page(page); | ||
5116 | |||
5117 | nested_free_vmcs02(vmx, vmptr); | ||
5118 | |||
5119 | skip_emulated_instruction(vcpu); | ||
5120 | nested_vmx_succeed(vcpu); | ||
5121 | return 1; | ||
5122 | } | ||
5123 | |||
5124 | static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); | ||
5125 | |||
5126 | /* Emulate the VMLAUNCH instruction */ | ||
5127 | static int handle_vmlaunch(struct kvm_vcpu *vcpu) | ||
5128 | { | ||
5129 | return nested_vmx_run(vcpu, true); | ||
5130 | } | ||
5131 | |||
5132 | /* Emulate the VMRESUME instruction */ | ||
5133 | static int handle_vmresume(struct kvm_vcpu *vcpu) | ||
5134 | { | ||
5135 | |||
5136 | return nested_vmx_run(vcpu, false); | ||
5137 | } | ||
5138 | |||
5139 | enum vmcs_field_type { | ||
5140 | VMCS_FIELD_TYPE_U16 = 0, | ||
5141 | VMCS_FIELD_TYPE_U64 = 1, | ||
5142 | VMCS_FIELD_TYPE_U32 = 2, | ||
5143 | VMCS_FIELD_TYPE_NATURAL_WIDTH = 3 | ||
5144 | }; | ||
5145 | |||
5146 | static inline int vmcs_field_type(unsigned long field) | ||
5147 | { | ||
5148 | if (0x1 & field) /* the *_HIGH fields are all 32 bit */ | ||
5149 | return VMCS_FIELD_TYPE_U32; | ||
5150 | return (field >> 13) & 0x3 ; | ||
5151 | } | ||
5152 | |||
5153 | static inline int vmcs_field_readonly(unsigned long field) | ||
5154 | { | ||
5155 | return (((field >> 10) & 0x3) == 1); | ||
5156 | } | ||
5157 | |||
5158 | /* | ||
5159 | * Read a vmcs12 field. Since these can have varying lengths and we return | ||
5160 | * one type, we chose the biggest type (u64) and zero-extend the return value | ||
5161 | * to that size. Note that the caller, handle_vmread, might need to use only | ||
5162 | * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of | ||
5163 | * 64-bit fields are to be returned). | ||
5164 | */ | ||
5165 | static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, | ||
5166 | unsigned long field, u64 *ret) | ||
5167 | { | ||
5168 | short offset = vmcs_field_to_offset(field); | ||
5169 | char *p; | ||
5170 | |||
5171 | if (offset < 0) | ||
5172 | return 0; | ||
5173 | |||
5174 | p = ((char *)(get_vmcs12(vcpu))) + offset; | ||
5175 | |||
5176 | switch (vmcs_field_type(field)) { | ||
5177 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5178 | *ret = *((natural_width *)p); | ||
5179 | return 1; | ||
5180 | case VMCS_FIELD_TYPE_U16: | ||
5181 | *ret = *((u16 *)p); | ||
5182 | return 1; | ||
5183 | case VMCS_FIELD_TYPE_U32: | ||
5184 | *ret = *((u32 *)p); | ||
5185 | return 1; | ||
5186 | case VMCS_FIELD_TYPE_U64: | ||
5187 | *ret = *((u64 *)p); | ||
5188 | return 1; | ||
5189 | default: | ||
5190 | return 0; /* can never happen. */ | ||
5191 | } | ||
5192 | } | ||
5193 | |||
5194 | /* | ||
5195 | * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was | ||
5196 | * used before) all generate the same failure when it is missing. | ||
5197 | */ | ||
5198 | static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu) | ||
5199 | { | ||
5200 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5201 | if (vmx->nested.current_vmptr == -1ull) { | ||
5202 | nested_vmx_failInvalid(vcpu); | ||
5203 | skip_emulated_instruction(vcpu); | ||
5204 | return 0; | ||
5205 | } | ||
5206 | return 1; | ||
5207 | } | ||
5208 | |||
5209 | static int handle_vmread(struct kvm_vcpu *vcpu) | ||
5210 | { | ||
5211 | unsigned long field; | ||
5212 | u64 field_value; | ||
5213 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
5214 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
5215 | gva_t gva = 0; | ||
5216 | |||
5217 | if (!nested_vmx_check_permission(vcpu) || | ||
5218 | !nested_vmx_check_vmcs12(vcpu)) | ||
5219 | return 1; | ||
5220 | |||
5221 | /* Decode instruction info and find the field to read */ | ||
5222 | field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); | ||
5223 | /* Read the field, zero-extended to a u64 field_value */ | ||
5224 | if (!vmcs12_read_any(vcpu, field, &field_value)) { | ||
5225 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | ||
5226 | skip_emulated_instruction(vcpu); | ||
5227 | return 1; | ||
5228 | } | ||
5229 | /* | ||
5230 | * Now copy part of this value to register or memory, as requested. | ||
5231 | * Note that the number of bits actually copied is 32 or 64 depending | ||
5232 | * on the guest's mode (32 or 64 bit), not on the given field's length. | ||
5233 | */ | ||
5234 | if (vmx_instruction_info & (1u << 10)) { | ||
5235 | kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf), | ||
5236 | field_value); | ||
5237 | } else { | ||
5238 | if (get_vmx_mem_address(vcpu, exit_qualification, | ||
5239 | vmx_instruction_info, &gva)) | ||
5240 | return 1; | ||
5241 | /* _system ok, as nested_vmx_check_permission verified cpl=0 */ | ||
5242 | kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva, | ||
5243 | &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL); | ||
5244 | } | ||
5245 | |||
5246 | nested_vmx_succeed(vcpu); | ||
5247 | skip_emulated_instruction(vcpu); | ||
5248 | return 1; | ||
5249 | } | ||
5250 | |||
5251 | |||
5252 | static int handle_vmwrite(struct kvm_vcpu *vcpu) | ||
5253 | { | ||
5254 | unsigned long field; | ||
5255 | gva_t gva; | ||
5256 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
5257 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
5258 | char *p; | ||
5259 | short offset; | ||
5260 | /* The value to write might be 32 or 64 bits, depending on L1's long | ||
5261 | * mode, and eventually we need to write that into a field of several | ||
5262 | * possible lengths. The code below first zero-extends the value to 64 | ||
5263 | * bit (field_value), and then copies only the approriate number of | ||
5264 | * bits into the vmcs12 field. | ||
5265 | */ | ||
5266 | u64 field_value = 0; | ||
5267 | struct x86_exception e; | ||
5268 | |||
5269 | if (!nested_vmx_check_permission(vcpu) || | ||
5270 | !nested_vmx_check_vmcs12(vcpu)) | ||
5271 | return 1; | ||
5272 | |||
5273 | if (vmx_instruction_info & (1u << 10)) | ||
5274 | field_value = kvm_register_read(vcpu, | ||
5275 | (((vmx_instruction_info) >> 3) & 0xf)); | ||
5276 | else { | ||
5277 | if (get_vmx_mem_address(vcpu, exit_qualification, | ||
5278 | vmx_instruction_info, &gva)) | ||
5279 | return 1; | ||
5280 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, | ||
5281 | &field_value, (is_long_mode(vcpu) ? 8 : 4), &e)) { | ||
5282 | kvm_inject_page_fault(vcpu, &e); | ||
5283 | return 1; | ||
5284 | } | ||
5285 | } | ||
5286 | |||
5287 | |||
5288 | field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); | ||
5289 | if (vmcs_field_readonly(field)) { | ||
5290 | nested_vmx_failValid(vcpu, | ||
5291 | VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); | ||
5292 | skip_emulated_instruction(vcpu); | ||
5293 | return 1; | ||
5294 | } | ||
5295 | |||
5296 | offset = vmcs_field_to_offset(field); | ||
5297 | if (offset < 0) { | ||
5298 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | ||
5299 | skip_emulated_instruction(vcpu); | ||
5300 | return 1; | ||
5301 | } | ||
5302 | p = ((char *) get_vmcs12(vcpu)) + offset; | ||
5303 | |||
5304 | switch (vmcs_field_type(field)) { | ||
5305 | case VMCS_FIELD_TYPE_U16: | ||
5306 | *(u16 *)p = field_value; | ||
5307 | break; | ||
5308 | case VMCS_FIELD_TYPE_U32: | ||
5309 | *(u32 *)p = field_value; | ||
5310 | break; | ||
5311 | case VMCS_FIELD_TYPE_U64: | ||
5312 | *(u64 *)p = field_value; | ||
5313 | break; | ||
5314 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5315 | *(natural_width *)p = field_value; | ||
5316 | break; | ||
5317 | default: | ||
5318 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | ||
5319 | skip_emulated_instruction(vcpu); | ||
5320 | return 1; | ||
5321 | } | ||
5322 | |||
5323 | nested_vmx_succeed(vcpu); | ||
5324 | skip_emulated_instruction(vcpu); | ||
5325 | return 1; | ||
5326 | } | ||
5327 | |||
5328 | /* Emulate the VMPTRLD instruction */ | ||
5329 | static int handle_vmptrld(struct kvm_vcpu *vcpu) | ||
5330 | { | ||
5331 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5332 | gva_t gva; | ||
5333 | gpa_t vmptr; | ||
5334 | struct x86_exception e; | ||
5335 | |||
5336 | if (!nested_vmx_check_permission(vcpu)) | ||
5337 | return 1; | ||
5338 | |||
5339 | if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), | ||
5340 | vmcs_read32(VMX_INSTRUCTION_INFO), &gva)) | ||
5341 | return 1; | ||
5342 | |||
5343 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, | ||
5344 | sizeof(vmptr), &e)) { | ||
5345 | kvm_inject_page_fault(vcpu, &e); | ||
5346 | return 1; | ||
5347 | } | ||
5348 | |||
5349 | if (!IS_ALIGNED(vmptr, PAGE_SIZE)) { | ||
5350 | nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); | ||
5351 | skip_emulated_instruction(vcpu); | ||
5352 | return 1; | ||
5353 | } | ||
5354 | |||
5355 | if (vmx->nested.current_vmptr != vmptr) { | ||
5356 | struct vmcs12 *new_vmcs12; | ||
5357 | struct page *page; | ||
5358 | page = nested_get_page(vcpu, vmptr); | ||
5359 | if (page == NULL) { | ||
5360 | nested_vmx_failInvalid(vcpu); | ||
5361 | skip_emulated_instruction(vcpu); | ||
5362 | return 1; | ||
5363 | } | ||
5364 | new_vmcs12 = kmap(page); | ||
5365 | if (new_vmcs12->revision_id != VMCS12_REVISION) { | ||
5366 | kunmap(page); | ||
5367 | nested_release_page_clean(page); | ||
5368 | nested_vmx_failValid(vcpu, | ||
5369 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); | ||
5370 | skip_emulated_instruction(vcpu); | ||
5371 | return 1; | ||
5372 | } | ||
5373 | if (vmx->nested.current_vmptr != -1ull) { | ||
5374 | kunmap(vmx->nested.current_vmcs12_page); | ||
5375 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5376 | } | ||
5377 | |||
5378 | vmx->nested.current_vmptr = vmptr; | ||
5379 | vmx->nested.current_vmcs12 = new_vmcs12; | ||
5380 | vmx->nested.current_vmcs12_page = page; | ||
5381 | } | ||
5382 | |||
5383 | nested_vmx_succeed(vcpu); | ||
5384 | skip_emulated_instruction(vcpu); | ||
5385 | return 1; | ||
5386 | } | ||
5387 | |||
5388 | /* Emulate the VMPTRST instruction */ | ||
5389 | static int handle_vmptrst(struct kvm_vcpu *vcpu) | ||
5390 | { | ||
5391 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
5392 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
5393 | gva_t vmcs_gva; | ||
5394 | struct x86_exception e; | ||
5395 | |||
5396 | if (!nested_vmx_check_permission(vcpu)) | ||
5397 | return 1; | ||
5398 | |||
5399 | if (get_vmx_mem_address(vcpu, exit_qualification, | ||
5400 | vmx_instruction_info, &vmcs_gva)) | ||
5401 | return 1; | ||
5402 | /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */ | ||
5403 | if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva, | ||
5404 | (void *)&to_vmx(vcpu)->nested.current_vmptr, | ||
5405 | sizeof(u64), &e)) { | ||
5406 | kvm_inject_page_fault(vcpu, &e); | ||
5407 | return 1; | ||
5408 | } | ||
5409 | nested_vmx_succeed(vcpu); | ||
5410 | skip_emulated_instruction(vcpu); | ||
5411 | return 1; | ||
5412 | } | ||
5413 | |||
5414 | /* | ||
3869 | * The exit handlers return 1 if the exit was handled fully and guest execution | 5415 | * The exit handlers return 1 if the exit was handled fully and guest execution |
3870 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 5416 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
3871 | * to be done to userspace and return 0. | 5417 | * to be done to userspace and return 0. |
@@ -3886,15 +5432,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3886 | [EXIT_REASON_INVD] = handle_invd, | 5432 | [EXIT_REASON_INVD] = handle_invd, |
3887 | [EXIT_REASON_INVLPG] = handle_invlpg, | 5433 | [EXIT_REASON_INVLPG] = handle_invlpg, |
3888 | [EXIT_REASON_VMCALL] = handle_vmcall, | 5434 | [EXIT_REASON_VMCALL] = handle_vmcall, |
3889 | [EXIT_REASON_VMCLEAR] = handle_vmx_insn, | 5435 | [EXIT_REASON_VMCLEAR] = handle_vmclear, |
3890 | [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, | 5436 | [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, |
3891 | [EXIT_REASON_VMPTRLD] = handle_vmx_insn, | 5437 | [EXIT_REASON_VMPTRLD] = handle_vmptrld, |
3892 | [EXIT_REASON_VMPTRST] = handle_vmx_insn, | 5438 | [EXIT_REASON_VMPTRST] = handle_vmptrst, |
3893 | [EXIT_REASON_VMREAD] = handle_vmx_insn, | 5439 | [EXIT_REASON_VMREAD] = handle_vmread, |
3894 | [EXIT_REASON_VMRESUME] = handle_vmx_insn, | 5440 | [EXIT_REASON_VMRESUME] = handle_vmresume, |
3895 | [EXIT_REASON_VMWRITE] = handle_vmx_insn, | 5441 | [EXIT_REASON_VMWRITE] = handle_vmwrite, |
3896 | [EXIT_REASON_VMOFF] = handle_vmx_insn, | 5442 | [EXIT_REASON_VMOFF] = handle_vmoff, |
3897 | [EXIT_REASON_VMON] = handle_vmx_insn, | 5443 | [EXIT_REASON_VMON] = handle_vmon, |
3898 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 5444 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
3899 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 5445 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
3900 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 5446 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
@@ -3911,6 +5457,229 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3911 | static const int kvm_vmx_max_exit_handlers = | 5457 | static const int kvm_vmx_max_exit_handlers = |
3912 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 5458 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
3913 | 5459 | ||
5460 | /* | ||
5461 | * Return 1 if we should exit from L2 to L1 to handle an MSR access access, | ||
5462 | * rather than handle it ourselves in L0. I.e., check whether L1 expressed | ||
5463 | * disinterest in the current event (read or write a specific MSR) by using an | ||
5464 | * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps. | ||
5465 | */ | ||
5466 | static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | ||
5467 | struct vmcs12 *vmcs12, u32 exit_reason) | ||
5468 | { | ||
5469 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; | ||
5470 | gpa_t bitmap; | ||
5471 | |||
5472 | if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) | ||
5473 | return 1; | ||
5474 | |||
5475 | /* | ||
5476 | * The MSR_BITMAP page is divided into four 1024-byte bitmaps, | ||
5477 | * for the four combinations of read/write and low/high MSR numbers. | ||
5478 | * First we need to figure out which of the four to use: | ||
5479 | */ | ||
5480 | bitmap = vmcs12->msr_bitmap; | ||
5481 | if (exit_reason == EXIT_REASON_MSR_WRITE) | ||
5482 | bitmap += 2048; | ||
5483 | if (msr_index >= 0xc0000000) { | ||
5484 | msr_index -= 0xc0000000; | ||
5485 | bitmap += 1024; | ||
5486 | } | ||
5487 | |||
5488 | /* Then read the msr_index'th bit from this bitmap: */ | ||
5489 | if (msr_index < 1024*8) { | ||
5490 | unsigned char b; | ||
5491 | kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1); | ||
5492 | return 1 & (b >> (msr_index & 7)); | ||
5493 | } else | ||
5494 | return 1; /* let L1 handle the wrong parameter */ | ||
5495 | } | ||
5496 | |||
5497 | /* | ||
5498 | * Return 1 if we should exit from L2 to L1 to handle a CR access exit, | ||
5499 | * rather than handle it ourselves in L0. I.e., check if L1 wanted to | ||
5500 | * intercept (via guest_host_mask etc.) the current event. | ||
5501 | */ | ||
5502 | static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | ||
5503 | struct vmcs12 *vmcs12) | ||
5504 | { | ||
5505 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
5506 | int cr = exit_qualification & 15; | ||
5507 | int reg = (exit_qualification >> 8) & 15; | ||
5508 | unsigned long val = kvm_register_read(vcpu, reg); | ||
5509 | |||
5510 | switch ((exit_qualification >> 4) & 3) { | ||
5511 | case 0: /* mov to cr */ | ||
5512 | switch (cr) { | ||
5513 | case 0: | ||
5514 | if (vmcs12->cr0_guest_host_mask & | ||
5515 | (val ^ vmcs12->cr0_read_shadow)) | ||
5516 | return 1; | ||
5517 | break; | ||
5518 | case 3: | ||
5519 | if ((vmcs12->cr3_target_count >= 1 && | ||
5520 | vmcs12->cr3_target_value0 == val) || | ||
5521 | (vmcs12->cr3_target_count >= 2 && | ||
5522 | vmcs12->cr3_target_value1 == val) || | ||
5523 | (vmcs12->cr3_target_count >= 3 && | ||
5524 | vmcs12->cr3_target_value2 == val) || | ||
5525 | (vmcs12->cr3_target_count >= 4 && | ||
5526 | vmcs12->cr3_target_value3 == val)) | ||
5527 | return 0; | ||
5528 | if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) | ||
5529 | return 1; | ||
5530 | break; | ||
5531 | case 4: | ||
5532 | if (vmcs12->cr4_guest_host_mask & | ||
5533 | (vmcs12->cr4_read_shadow ^ val)) | ||
5534 | return 1; | ||
5535 | break; | ||
5536 | case 8: | ||
5537 | if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) | ||
5538 | return 1; | ||
5539 | break; | ||
5540 | } | ||
5541 | break; | ||
5542 | case 2: /* clts */ | ||
5543 | if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && | ||
5544 | (vmcs12->cr0_read_shadow & X86_CR0_TS)) | ||
5545 | return 1; | ||
5546 | break; | ||
5547 | case 1: /* mov from cr */ | ||
5548 | switch (cr) { | ||
5549 | case 3: | ||
5550 | if (vmcs12->cpu_based_vm_exec_control & | ||
5551 | CPU_BASED_CR3_STORE_EXITING) | ||
5552 | return 1; | ||
5553 | break; | ||
5554 | case 8: | ||
5555 | if (vmcs12->cpu_based_vm_exec_control & | ||
5556 | CPU_BASED_CR8_STORE_EXITING) | ||
5557 | return 1; | ||
5558 | break; | ||
5559 | } | ||
5560 | break; | ||
5561 | case 3: /* lmsw */ | ||
5562 | /* | ||
5563 | * lmsw can change bits 1..3 of cr0, and only set bit 0 of | ||
5564 | * cr0. Other attempted changes are ignored, with no exit. | ||
5565 | */ | ||
5566 | if (vmcs12->cr0_guest_host_mask & 0xe & | ||
5567 | (val ^ vmcs12->cr0_read_shadow)) | ||
5568 | return 1; | ||
5569 | if ((vmcs12->cr0_guest_host_mask & 0x1) && | ||
5570 | !(vmcs12->cr0_read_shadow & 0x1) && | ||
5571 | (val & 0x1)) | ||
5572 | return 1; | ||
5573 | break; | ||
5574 | } | ||
5575 | return 0; | ||
5576 | } | ||
5577 | |||
5578 | /* | ||
5579 | * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we | ||
5580 | * should handle it ourselves in L0 (and then continue L2). Only call this | ||
5581 | * when in is_guest_mode (L2). | ||
5582 | */ | ||
5583 | static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | ||
5584 | { | ||
5585 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
5586 | u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
5587 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5588 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
5589 | |||
5590 | if (vmx->nested.nested_run_pending) | ||
5591 | return 0; | ||
5592 | |||
5593 | if (unlikely(vmx->fail)) { | ||
5594 | printk(KERN_INFO "%s failed vm entry %x\n", | ||
5595 | __func__, vmcs_read32(VM_INSTRUCTION_ERROR)); | ||
5596 | return 1; | ||
5597 | } | ||
5598 | |||
5599 | switch (exit_reason) { | ||
5600 | case EXIT_REASON_EXCEPTION_NMI: | ||
5601 | if (!is_exception(intr_info)) | ||
5602 | return 0; | ||
5603 | else if (is_page_fault(intr_info)) | ||
5604 | return enable_ept; | ||
5605 | return vmcs12->exception_bitmap & | ||
5606 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); | ||
5607 | case EXIT_REASON_EXTERNAL_INTERRUPT: | ||
5608 | return 0; | ||
5609 | case EXIT_REASON_TRIPLE_FAULT: | ||
5610 | return 1; | ||
5611 | case EXIT_REASON_PENDING_INTERRUPT: | ||
5612 | case EXIT_REASON_NMI_WINDOW: | ||
5613 | /* | ||
5614 | * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit | ||
5615 | * (aka Interrupt Window Exiting) only when L1 turned it on, | ||
5616 | * so if we got a PENDING_INTERRUPT exit, this must be for L1. | ||
5617 | * Same for NMI Window Exiting. | ||
5618 | */ | ||
5619 | return 1; | ||
5620 | case EXIT_REASON_TASK_SWITCH: | ||
5621 | return 1; | ||
5622 | case EXIT_REASON_CPUID: | ||
5623 | return 1; | ||
5624 | case EXIT_REASON_HLT: | ||
5625 | return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); | ||
5626 | case EXIT_REASON_INVD: | ||
5627 | return 1; | ||
5628 | case EXIT_REASON_INVLPG: | ||
5629 | return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); | ||
5630 | case EXIT_REASON_RDPMC: | ||
5631 | return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); | ||
5632 | case EXIT_REASON_RDTSC: | ||
5633 | return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); | ||
5634 | case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: | ||
5635 | case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: | ||
5636 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: | ||
5637 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: | ||
5638 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: | ||
5639 | /* | ||
5640 | * VMX instructions trap unconditionally. This allows L1 to | ||
5641 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | ||
5642 | */ | ||
5643 | return 1; | ||
5644 | case EXIT_REASON_CR_ACCESS: | ||
5645 | return nested_vmx_exit_handled_cr(vcpu, vmcs12); | ||
5646 | case EXIT_REASON_DR_ACCESS: | ||
5647 | return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); | ||
5648 | case EXIT_REASON_IO_INSTRUCTION: | ||
5649 | /* TODO: support IO bitmaps */ | ||
5650 | return 1; | ||
5651 | case EXIT_REASON_MSR_READ: | ||
5652 | case EXIT_REASON_MSR_WRITE: | ||
5653 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); | ||
5654 | case EXIT_REASON_INVALID_STATE: | ||
5655 | return 1; | ||
5656 | case EXIT_REASON_MWAIT_INSTRUCTION: | ||
5657 | return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); | ||
5658 | case EXIT_REASON_MONITOR_INSTRUCTION: | ||
5659 | return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); | ||
5660 | case EXIT_REASON_PAUSE_INSTRUCTION: | ||
5661 | return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) || | ||
5662 | nested_cpu_has2(vmcs12, | ||
5663 | SECONDARY_EXEC_PAUSE_LOOP_EXITING); | ||
5664 | case EXIT_REASON_MCE_DURING_VMENTRY: | ||
5665 | return 0; | ||
5666 | case EXIT_REASON_TPR_BELOW_THRESHOLD: | ||
5667 | return 1; | ||
5668 | case EXIT_REASON_APIC_ACCESS: | ||
5669 | return nested_cpu_has2(vmcs12, | ||
5670 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | ||
5671 | case EXIT_REASON_EPT_VIOLATION: | ||
5672 | case EXIT_REASON_EPT_MISCONFIG: | ||
5673 | return 0; | ||
5674 | case EXIT_REASON_WBINVD: | ||
5675 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | ||
5676 | case EXIT_REASON_XSETBV: | ||
5677 | return 1; | ||
5678 | default: | ||
5679 | return 1; | ||
5680 | } | ||
5681 | } | ||
5682 | |||
3914 | static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | 5683 | static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) |
3915 | { | 5684 | { |
3916 | *info1 = vmcs_readl(EXIT_QUALIFICATION); | 5685 | *info1 = vmcs_readl(EXIT_QUALIFICATION); |
@@ -3933,6 +5702,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3933 | if (vmx->emulation_required && emulate_invalid_guest_state) | 5702 | if (vmx->emulation_required && emulate_invalid_guest_state) |
3934 | return handle_invalid_guest_state(vcpu); | 5703 | return handle_invalid_guest_state(vcpu); |
3935 | 5704 | ||
5705 | /* | ||
5706 | * the KVM_REQ_EVENT optimization bit is only on for one entry, and if | ||
5707 | * we did not inject a still-pending event to L1 now because of | ||
5708 | * nested_run_pending, we need to re-enable this bit. | ||
5709 | */ | ||
5710 | if (vmx->nested.nested_run_pending) | ||
5711 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5712 | |||
5713 | if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH || | ||
5714 | exit_reason == EXIT_REASON_VMRESUME)) | ||
5715 | vmx->nested.nested_run_pending = 1; | ||
5716 | else | ||
5717 | vmx->nested.nested_run_pending = 0; | ||
5718 | |||
5719 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { | ||
5720 | nested_vmx_vmexit(vcpu); | ||
5721 | return 1; | ||
5722 | } | ||
5723 | |||
3936 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | 5724 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { |
3937 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 5725 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3938 | vcpu->run->fail_entry.hardware_entry_failure_reason | 5726 | vcpu->run->fail_entry.hardware_entry_failure_reason |
@@ -3955,7 +5743,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3955 | "(0x%x) and exit reason is 0x%x\n", | 5743 | "(0x%x) and exit reason is 0x%x\n", |
3956 | __func__, vectoring_info, exit_reason); | 5744 | __func__, vectoring_info, exit_reason); |
3957 | 5745 | ||
3958 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { | 5746 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && |
5747 | !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( | ||
5748 | get_vmcs12(vcpu), vcpu)))) { | ||
3959 | if (vmx_interrupt_allowed(vcpu)) { | 5749 | if (vmx_interrupt_allowed(vcpu)) { |
3960 | vmx->soft_vnmi_blocked = 0; | 5750 | vmx->soft_vnmi_blocked = 0; |
3961 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | 5751 | } else if (vmx->vnmi_blocked_time > 1000000000LL && |
@@ -4118,6 +5908,8 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
4118 | 5908 | ||
4119 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 5909 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
4120 | { | 5910 | { |
5911 | if (is_guest_mode(&vmx->vcpu)) | ||
5912 | return; | ||
4121 | __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, | 5913 | __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, |
4122 | VM_EXIT_INSTRUCTION_LEN, | 5914 | VM_EXIT_INSTRUCTION_LEN, |
4123 | IDT_VECTORING_ERROR_CODE); | 5915 | IDT_VECTORING_ERROR_CODE); |
@@ -4125,6 +5917,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
4125 | 5917 | ||
4126 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) | 5918 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) |
4127 | { | 5919 | { |
5920 | if (is_guest_mode(vcpu)) | ||
5921 | return; | ||
4128 | __vmx_complete_interrupts(to_vmx(vcpu), | 5922 | __vmx_complete_interrupts(to_vmx(vcpu), |
4129 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), | 5923 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), |
4130 | VM_ENTRY_INSTRUCTION_LEN, | 5924 | VM_ENTRY_INSTRUCTION_LEN, |
@@ -4145,6 +5939,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4145 | { | 5939 | { |
4146 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5940 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4147 | 5941 | ||
5942 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { | ||
5943 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
5944 | if (vmcs12->idt_vectoring_info_field & | ||
5945 | VECTORING_INFO_VALID_MASK) { | ||
5946 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
5947 | vmcs12->idt_vectoring_info_field); | ||
5948 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
5949 | vmcs12->vm_exit_instruction_len); | ||
5950 | if (vmcs12->idt_vectoring_info_field & | ||
5951 | VECTORING_INFO_DELIVER_CODE_MASK) | ||
5952 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
5953 | vmcs12->idt_vectoring_error_code); | ||
5954 | } | ||
5955 | } | ||
5956 | |||
4148 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 5957 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
4149 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 5958 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
4150 | vmx->entry_time = ktime_get(); | 5959 | vmx->entry_time = ktime_get(); |
@@ -4167,6 +5976,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4167 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5976 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
4168 | vmx_set_interrupt_shadow(vcpu, 0); | 5977 | vmx_set_interrupt_shadow(vcpu, 0); |
4169 | 5978 | ||
5979 | vmx->__launched = vmx->loaded_vmcs->launched; | ||
4170 | asm( | 5980 | asm( |
4171 | /* Store host registers */ | 5981 | /* Store host registers */ |
4172 | "push %%"R"dx; push %%"R"bp;" | 5982 | "push %%"R"dx; push %%"R"bp;" |
@@ -4237,7 +6047,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4237 | "pop %%"R"bp; pop %%"R"dx \n\t" | 6047 | "pop %%"R"bp; pop %%"R"dx \n\t" |
4238 | "setbe %c[fail](%0) \n\t" | 6048 | "setbe %c[fail](%0) \n\t" |
4239 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 6049 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
4240 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), | 6050 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), |
4241 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 6051 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), |
4242 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | 6052 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), |
4243 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), | 6053 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), |
@@ -4276,8 +6086,19 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4276 | 6086 | ||
4277 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 6087 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
4278 | 6088 | ||
6089 | if (is_guest_mode(vcpu)) { | ||
6090 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
6091 | vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info; | ||
6092 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { | ||
6093 | vmcs12->idt_vectoring_error_code = | ||
6094 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
6095 | vmcs12->vm_exit_instruction_len = | ||
6096 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
6097 | } | ||
6098 | } | ||
6099 | |||
4279 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 6100 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
4280 | vmx->launched = 1; | 6101 | vmx->loaded_vmcs->launched = 1; |
4281 | 6102 | ||
4282 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 6103 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
4283 | 6104 | ||
@@ -4289,41 +6110,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4289 | #undef R | 6110 | #undef R |
4290 | #undef Q | 6111 | #undef Q |
4291 | 6112 | ||
4292 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) | ||
4293 | { | ||
4294 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4295 | |||
4296 | if (vmx->vmcs) { | ||
4297 | vcpu_clear(vmx); | ||
4298 | free_vmcs(vmx->vmcs); | ||
4299 | vmx->vmcs = NULL; | ||
4300 | } | ||
4301 | } | ||
4302 | |||
4303 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | 6113 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) |
4304 | { | 6114 | { |
4305 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6115 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4306 | 6116 | ||
4307 | free_vpid(vmx); | 6117 | free_vpid(vmx); |
4308 | vmx_free_vmcs(vcpu); | 6118 | free_nested(vmx); |
6119 | free_loaded_vmcs(vmx->loaded_vmcs); | ||
4309 | kfree(vmx->guest_msrs); | 6120 | kfree(vmx->guest_msrs); |
4310 | kvm_vcpu_uninit(vcpu); | 6121 | kvm_vcpu_uninit(vcpu); |
4311 | kmem_cache_free(kvm_vcpu_cache, vmx); | 6122 | kmem_cache_free(kvm_vcpu_cache, vmx); |
4312 | } | 6123 | } |
4313 | 6124 | ||
4314 | static inline void vmcs_init(struct vmcs *vmcs) | ||
4315 | { | ||
4316 | u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); | ||
4317 | |||
4318 | if (!vmm_exclusive) | ||
4319 | kvm_cpu_vmxon(phys_addr); | ||
4320 | |||
4321 | vmcs_clear(vmcs); | ||
4322 | |||
4323 | if (!vmm_exclusive) | ||
4324 | kvm_cpu_vmxoff(); | ||
4325 | } | ||
4326 | |||
4327 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | 6125 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
4328 | { | 6126 | { |
4329 | int err; | 6127 | int err; |
@@ -4345,11 +6143,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4345 | goto uninit_vcpu; | 6143 | goto uninit_vcpu; |
4346 | } | 6144 | } |
4347 | 6145 | ||
4348 | vmx->vmcs = alloc_vmcs(); | 6146 | vmx->loaded_vmcs = &vmx->vmcs01; |
4349 | if (!vmx->vmcs) | 6147 | vmx->loaded_vmcs->vmcs = alloc_vmcs(); |
6148 | if (!vmx->loaded_vmcs->vmcs) | ||
4350 | goto free_msrs; | 6149 | goto free_msrs; |
4351 | 6150 | if (!vmm_exclusive) | |
4352 | vmcs_init(vmx->vmcs); | 6151 | kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); |
6152 | loaded_vmcs_init(vmx->loaded_vmcs); | ||
6153 | if (!vmm_exclusive) | ||
6154 | kvm_cpu_vmxoff(); | ||
4353 | 6155 | ||
4354 | cpu = get_cpu(); | 6156 | cpu = get_cpu(); |
4355 | vmx_vcpu_load(&vmx->vcpu, cpu); | 6157 | vmx_vcpu_load(&vmx->vcpu, cpu); |
@@ -4375,10 +6177,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4375 | goto free_vmcs; | 6177 | goto free_vmcs; |
4376 | } | 6178 | } |
4377 | 6179 | ||
6180 | vmx->nested.current_vmptr = -1ull; | ||
6181 | vmx->nested.current_vmcs12 = NULL; | ||
6182 | |||
4378 | return &vmx->vcpu; | 6183 | return &vmx->vcpu; |
4379 | 6184 | ||
4380 | free_vmcs: | 6185 | free_vmcs: |
4381 | free_vmcs(vmx->vmcs); | 6186 | free_vmcs(vmx->loaded_vmcs->vmcs); |
4382 | free_msrs: | 6187 | free_msrs: |
4383 | kfree(vmx->guest_msrs); | 6188 | kfree(vmx->guest_msrs); |
4384 | uninit_vcpu: | 6189 | uninit_vcpu: |
@@ -4512,6 +6317,650 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
4512 | 6317 | ||
4513 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 6318 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
4514 | { | 6319 | { |
6320 | if (func == 1 && nested) | ||
6321 | entry->ecx |= bit(X86_FEATURE_VMX); | ||
6322 | } | ||
6323 | |||
6324 | /* | ||
6325 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | ||
6326 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | ||
6327 | * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2 | ||
6328 | * guest in a way that will both be appropriate to L1's requests, and our | ||
6329 | * needs. In addition to modifying the active vmcs (which is vmcs02), this | ||
6330 | * function also has additional necessary side-effects, like setting various | ||
6331 | * vcpu->arch fields. | ||
6332 | */ | ||
6333 | static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | ||
6334 | { | ||
6335 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6336 | u32 exec_control; | ||
6337 | |||
6338 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); | ||
6339 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); | ||
6340 | vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); | ||
6341 | vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector); | ||
6342 | vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector); | ||
6343 | vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector); | ||
6344 | vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector); | ||
6345 | vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector); | ||
6346 | vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit); | ||
6347 | vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); | ||
6348 | vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit); | ||
6349 | vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit); | ||
6350 | vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit); | ||
6351 | vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit); | ||
6352 | vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit); | ||
6353 | vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); | ||
6354 | vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); | ||
6355 | vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); | ||
6356 | vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); | ||
6357 | vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); | ||
6358 | vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); | ||
6359 | vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); | ||
6360 | vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); | ||
6361 | vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes); | ||
6362 | vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes); | ||
6363 | vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes); | ||
6364 | vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); | ||
6365 | vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); | ||
6366 | vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base); | ||
6367 | vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base); | ||
6368 | vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base); | ||
6369 | vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base); | ||
6370 | vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base); | ||
6371 | vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base); | ||
6372 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); | ||
6373 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); | ||
6374 | |||
6375 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); | ||
6376 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
6377 | vmcs12->vm_entry_intr_info_field); | ||
6378 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
6379 | vmcs12->vm_entry_exception_error_code); | ||
6380 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
6381 | vmcs12->vm_entry_instruction_len); | ||
6382 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | ||
6383 | vmcs12->guest_interruptibility_info); | ||
6384 | vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state); | ||
6385 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | ||
6386 | vmcs_writel(GUEST_DR7, vmcs12->guest_dr7); | ||
6387 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); | ||
6388 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | ||
6389 | vmcs12->guest_pending_dbg_exceptions); | ||
6390 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); | ||
6391 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); | ||
6392 | |||
6393 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | ||
6394 | |||
6395 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, | ||
6396 | (vmcs_config.pin_based_exec_ctrl | | ||
6397 | vmcs12->pin_based_vm_exec_control)); | ||
6398 | |||
6399 | /* | ||
6400 | * Whether page-faults are trapped is determined by a combination of | ||
6401 | * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. | ||
6402 | * If enable_ept, L0 doesn't care about page faults and we should | ||
6403 | * set all of these to L1's desires. However, if !enable_ept, L0 does | ||
6404 | * care about (at least some) page faults, and because it is not easy | ||
6405 | * (if at all possible?) to merge L0 and L1's desires, we simply ask | ||
6406 | * to exit on each and every L2 page fault. This is done by setting | ||
6407 | * MASK=MATCH=0 and (see below) EB.PF=1. | ||
6408 | * Note that below we don't need special code to set EB.PF beyond the | ||
6409 | * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, | ||
6410 | * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when | ||
6411 | * !enable_ept, EB.PF is 1, so the "or" will always be 1. | ||
6412 | * | ||
6413 | * A problem with this approach (when !enable_ept) is that L1 may be | ||
6414 | * injected with more page faults than it asked for. This could have | ||
6415 | * caused problems, but in practice existing hypervisors don't care. | ||
6416 | * To fix this, we will need to emulate the PFEC checking (on the L1 | ||
6417 | * page tables), using walk_addr(), when injecting PFs to L1. | ||
6418 | */ | ||
6419 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, | ||
6420 | enable_ept ? vmcs12->page_fault_error_code_mask : 0); | ||
6421 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, | ||
6422 | enable_ept ? vmcs12->page_fault_error_code_match : 0); | ||
6423 | |||
6424 | if (cpu_has_secondary_exec_ctrls()) { | ||
6425 | u32 exec_control = vmx_secondary_exec_control(vmx); | ||
6426 | if (!vmx->rdtscp_enabled) | ||
6427 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
6428 | /* Take the following fields only from vmcs12 */ | ||
6429 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6430 | if (nested_cpu_has(vmcs12, | ||
6431 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) | ||
6432 | exec_control |= vmcs12->secondary_vm_exec_control; | ||
6433 | |||
6434 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { | ||
6435 | /* | ||
6436 | * Translate L1 physical address to host physical | ||
6437 | * address for vmcs02. Keep the page pinned, so this | ||
6438 | * physical address remains valid. We keep a reference | ||
6439 | * to it so we can release it later. | ||
6440 | */ | ||
6441 | if (vmx->nested.apic_access_page) /* shouldn't happen */ | ||
6442 | nested_release_page(vmx->nested.apic_access_page); | ||
6443 | vmx->nested.apic_access_page = | ||
6444 | nested_get_page(vcpu, vmcs12->apic_access_addr); | ||
6445 | /* | ||
6446 | * If translation failed, no matter: This feature asks | ||
6447 | * to exit when accessing the given address, and if it | ||
6448 | * can never be accessed, this feature won't do | ||
6449 | * anything anyway. | ||
6450 | */ | ||
6451 | if (!vmx->nested.apic_access_page) | ||
6452 | exec_control &= | ||
6453 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6454 | else | ||
6455 | vmcs_write64(APIC_ACCESS_ADDR, | ||
6456 | page_to_phys(vmx->nested.apic_access_page)); | ||
6457 | } | ||
6458 | |||
6459 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
6460 | } | ||
6461 | |||
6462 | |||
6463 | /* | ||
6464 | * Set host-state according to L0's settings (vmcs12 is irrelevant here) | ||
6465 | * Some constant fields are set here by vmx_set_constant_host_state(). | ||
6466 | * Other fields are different per CPU, and will be set later when | ||
6467 | * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. | ||
6468 | */ | ||
6469 | vmx_set_constant_host_state(); | ||
6470 | |||
6471 | /* | ||
6472 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before | ||
6473 | * entry, but only if the current (host) sp changed from the value | ||
6474 | * we wrote last (vmx->host_rsp). This cache is no longer relevant | ||
6475 | * if we switch vmcs, and rather than hold a separate cache per vmcs, | ||
6476 | * here we just force the write to happen on entry. | ||
6477 | */ | ||
6478 | vmx->host_rsp = 0; | ||
6479 | |||
6480 | exec_control = vmx_exec_control(vmx); /* L0's desires */ | ||
6481 | exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | ||
6482 | exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; | ||
6483 | exec_control &= ~CPU_BASED_TPR_SHADOW; | ||
6484 | exec_control |= vmcs12->cpu_based_vm_exec_control; | ||
6485 | /* | ||
6486 | * Merging of IO and MSR bitmaps not currently supported. | ||
6487 | * Rather, exit every time. | ||
6488 | */ | ||
6489 | exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; | ||
6490 | exec_control &= ~CPU_BASED_USE_IO_BITMAPS; | ||
6491 | exec_control |= CPU_BASED_UNCOND_IO_EXITING; | ||
6492 | |||
6493 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | ||
6494 | |||
6495 | /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the | ||
6496 | * bitwise-or of what L1 wants to trap for L2, and what we want to | ||
6497 | * trap. Note that CR0.TS also needs updating - we do this later. | ||
6498 | */ | ||
6499 | update_exception_bitmap(vcpu); | ||
6500 | vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; | ||
6501 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
6502 | |||
6503 | /* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */ | ||
6504 | vmcs_write32(VM_EXIT_CONTROLS, | ||
6505 | vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl); | ||
6506 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls | | ||
6507 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | ||
6508 | |||
6509 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) | ||
6510 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); | ||
6511 | else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | ||
6512 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | ||
6513 | |||
6514 | |||
6515 | set_cr4_guest_host_mask(vmx); | ||
6516 | |||
6517 | vmcs_write64(TSC_OFFSET, | ||
6518 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); | ||
6519 | |||
6520 | if (enable_vpid) { | ||
6521 | /* | ||
6522 | * Trivially support vpid by letting L2s share their parent | ||
6523 | * L1's vpid. TODO: move to a more elaborate solution, giving | ||
6524 | * each L2 its own vpid and exposing the vpid feature to L1. | ||
6525 | */ | ||
6526 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | ||
6527 | vmx_flush_tlb(vcpu); | ||
6528 | } | ||
6529 | |||
6530 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | ||
6531 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | ||
6532 | if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | ||
6533 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | ||
6534 | else | ||
6535 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | ||
6536 | /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ | ||
6537 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
6538 | |||
6539 | /* | ||
6540 | * This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified | ||
6541 | * TS bit (for lazy fpu) and bits which we consider mandatory enabled. | ||
6542 | * The CR0_READ_SHADOW is what L2 should have expected to read given | ||
6543 | * the specifications by L1; It's not enough to take | ||
6544 | * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we | ||
6545 | * have more bits than L1 expected. | ||
6546 | */ | ||
6547 | vmx_set_cr0(vcpu, vmcs12->guest_cr0); | ||
6548 | vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); | ||
6549 | |||
6550 | vmx_set_cr4(vcpu, vmcs12->guest_cr4); | ||
6551 | vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); | ||
6552 | |||
6553 | /* shadow page tables on either EPT or shadow page tables */ | ||
6554 | kvm_set_cr3(vcpu, vmcs12->guest_cr3); | ||
6555 | kvm_mmu_reset_context(vcpu); | ||
6556 | |||
6557 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); | ||
6558 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); | ||
6559 | } | ||
6560 | |||
6561 | /* | ||
6562 | * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 | ||
6563 | * for running an L2 nested guest. | ||
6564 | */ | ||
6565 | static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | ||
6566 | { | ||
6567 | struct vmcs12 *vmcs12; | ||
6568 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6569 | int cpu; | ||
6570 | struct loaded_vmcs *vmcs02; | ||
6571 | |||
6572 | if (!nested_vmx_check_permission(vcpu) || | ||
6573 | !nested_vmx_check_vmcs12(vcpu)) | ||
6574 | return 1; | ||
6575 | |||
6576 | skip_emulated_instruction(vcpu); | ||
6577 | vmcs12 = get_vmcs12(vcpu); | ||
6578 | |||
6579 | /* | ||
6580 | * The nested entry process starts with enforcing various prerequisites | ||
6581 | * on vmcs12 as required by the Intel SDM, and act appropriately when | ||
6582 | * they fail: As the SDM explains, some conditions should cause the | ||
6583 | * instruction to fail, while others will cause the instruction to seem | ||
6584 | * to succeed, but return an EXIT_REASON_INVALID_STATE. | ||
6585 | * To speed up the normal (success) code path, we should avoid checking | ||
6586 | * for misconfigurations which will anyway be caught by the processor | ||
6587 | * when using the merged vmcs02. | ||
6588 | */ | ||
6589 | if (vmcs12->launch_state == launch) { | ||
6590 | nested_vmx_failValid(vcpu, | ||
6591 | launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS | ||
6592 | : VMXERR_VMRESUME_NONLAUNCHED_VMCS); | ||
6593 | return 1; | ||
6594 | } | ||
6595 | |||
6596 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && | ||
6597 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { | ||
6598 | /*TODO: Also verify bits beyond physical address width are 0*/ | ||
6599 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
6600 | return 1; | ||
6601 | } | ||
6602 | |||
6603 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && | ||
6604 | !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) { | ||
6605 | /*TODO: Also verify bits beyond physical address width are 0*/ | ||
6606 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
6607 | return 1; | ||
6608 | } | ||
6609 | |||
6610 | if (vmcs12->vm_entry_msr_load_count > 0 || | ||
6611 | vmcs12->vm_exit_msr_load_count > 0 || | ||
6612 | vmcs12->vm_exit_msr_store_count > 0) { | ||
6613 | if (printk_ratelimit()) | ||
6614 | printk(KERN_WARNING | ||
6615 | "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__); | ||
6616 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
6617 | return 1; | ||
6618 | } | ||
6619 | |||
6620 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, | ||
6621 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) || | ||
6622 | !vmx_control_verify(vmcs12->secondary_vm_exec_control, | ||
6623 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || | ||
6624 | !vmx_control_verify(vmcs12->pin_based_vm_exec_control, | ||
6625 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || | ||
6626 | !vmx_control_verify(vmcs12->vm_exit_controls, | ||
6627 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) || | ||
6628 | !vmx_control_verify(vmcs12->vm_entry_controls, | ||
6629 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high)) | ||
6630 | { | ||
6631 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
6632 | return 1; | ||
6633 | } | ||
6634 | |||
6635 | if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || | ||
6636 | ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { | ||
6637 | nested_vmx_failValid(vcpu, | ||
6638 | VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); | ||
6639 | return 1; | ||
6640 | } | ||
6641 | |||
6642 | if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || | ||
6643 | ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { | ||
6644 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
6645 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
6646 | return 1; | ||
6647 | } | ||
6648 | if (vmcs12->vmcs_link_pointer != -1ull) { | ||
6649 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
6650 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR); | ||
6651 | return 1; | ||
6652 | } | ||
6653 | |||
6654 | /* | ||
6655 | * We're finally done with prerequisite checking, and can start with | ||
6656 | * the nested entry. | ||
6657 | */ | ||
6658 | |||
6659 | vmcs02 = nested_get_current_vmcs02(vmx); | ||
6660 | if (!vmcs02) | ||
6661 | return -ENOMEM; | ||
6662 | |||
6663 | enter_guest_mode(vcpu); | ||
6664 | |||
6665 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); | ||
6666 | |||
6667 | cpu = get_cpu(); | ||
6668 | vmx->loaded_vmcs = vmcs02; | ||
6669 | vmx_vcpu_put(vcpu); | ||
6670 | vmx_vcpu_load(vcpu, cpu); | ||
6671 | vcpu->cpu = cpu; | ||
6672 | put_cpu(); | ||
6673 | |||
6674 | vmcs12->launch_state = 1; | ||
6675 | |||
6676 | prepare_vmcs02(vcpu, vmcs12); | ||
6677 | |||
6678 | /* | ||
6679 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | ||
6680 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | ||
6681 | * returned as far as L1 is concerned. It will only return (and set | ||
6682 | * the success flag) when L2 exits (see nested_vmx_vmexit()). | ||
6683 | */ | ||
6684 | return 1; | ||
6685 | } | ||
6686 | |||
6687 | /* | ||
6688 | * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date | ||
6689 | * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK). | ||
6690 | * This function returns the new value we should put in vmcs12.guest_cr0. | ||
6691 | * It's not enough to just return the vmcs02 GUEST_CR0. Rather, | ||
6692 | * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now | ||
6693 | * available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0 | ||
6694 | * didn't trap the bit, because if L1 did, so would L0). | ||
6695 | * 2. Bits that L1 asked to trap (and therefore L0 also did) could not have | ||
6696 | * been modified by L2, and L1 knows it. So just leave the old value of | ||
6697 | * the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0 | ||
6698 | * isn't relevant, because if L0 traps this bit it can set it to anything. | ||
6699 | * 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have | ||
6700 | * changed these bits, and therefore they need to be updated, but L0 | ||
6701 | * didn't necessarily allow them to be changed in GUEST_CR0 - and rather | ||
6702 | * put them in vmcs02 CR0_READ_SHADOW. So take these bits from there. | ||
6703 | */ | ||
6704 | static inline unsigned long | ||
6705 | vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | ||
6706 | { | ||
6707 | return | ||
6708 | /*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) | | ||
6709 | /*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) | | ||
6710 | /*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask | | ||
6711 | vcpu->arch.cr0_guest_owned_bits)); | ||
6712 | } | ||
6713 | |||
6714 | static inline unsigned long | ||
6715 | vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | ||
6716 | { | ||
6717 | return | ||
6718 | /*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) | | ||
6719 | /*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) | | ||
6720 | /*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask | | ||
6721 | vcpu->arch.cr4_guest_owned_bits)); | ||
6722 | } | ||
6723 | |||
6724 | /* | ||
6725 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | ||
6726 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | ||
6727 | * and this function updates it to reflect the changes to the guest state while | ||
6728 | * L2 was running (and perhaps made some exits which were handled directly by L0 | ||
6729 | * without going back to L1), and to reflect the exit reason. | ||
6730 | * Note that we do not have to copy here all VMCS fields, just those that | ||
6731 | * could have changed by the L2 guest or the exit - i.e., the guest-state and | ||
6732 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | ||
6733 | * which already writes to vmcs12 directly. | ||
6734 | */ | ||
6735 | void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | ||
6736 | { | ||
6737 | /* update guest state fields: */ | ||
6738 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | ||
6739 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); | ||
6740 | |||
6741 | kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); | ||
6742 | vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
6743 | vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); | ||
6744 | vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); | ||
6745 | |||
6746 | vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); | ||
6747 | vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); | ||
6748 | vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR); | ||
6749 | vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR); | ||
6750 | vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR); | ||
6751 | vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR); | ||
6752 | vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR); | ||
6753 | vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR); | ||
6754 | vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT); | ||
6755 | vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT); | ||
6756 | vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT); | ||
6757 | vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT); | ||
6758 | vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT); | ||
6759 | vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT); | ||
6760 | vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT); | ||
6761 | vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT); | ||
6762 | vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); | ||
6763 | vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); | ||
6764 | vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); | ||
6765 | vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); | ||
6766 | vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); | ||
6767 | vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); | ||
6768 | vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); | ||
6769 | vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); | ||
6770 | vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES); | ||
6771 | vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES); | ||
6772 | vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE); | ||
6773 | vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE); | ||
6774 | vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE); | ||
6775 | vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE); | ||
6776 | vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE); | ||
6777 | vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE); | ||
6778 | vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE); | ||
6779 | vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); | ||
6780 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); | ||
6781 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); | ||
6782 | |||
6783 | vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE); | ||
6784 | vmcs12->guest_interruptibility_info = | ||
6785 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
6786 | vmcs12->guest_pending_dbg_exceptions = | ||
6787 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | ||
6788 | |||
6789 | /* TODO: These cannot have changed unless we have MSR bitmaps and | ||
6790 | * the relevant bit asks not to trap the change */ | ||
6791 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | ||
6792 | if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) | ||
6793 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); | ||
6794 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | ||
6795 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | ||
6796 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | ||
6797 | |||
6798 | /* update exit information fields: */ | ||
6799 | |||
6800 | vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
6801 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
6802 | |||
6803 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
6804 | vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | ||
6805 | vmcs12->idt_vectoring_info_field = | ||
6806 | vmcs_read32(IDT_VECTORING_INFO_FIELD); | ||
6807 | vmcs12->idt_vectoring_error_code = | ||
6808 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
6809 | vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
6810 | vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | ||
6811 | |||
6812 | /* clear vm-entry fields which are to be cleared on exit */ | ||
6813 | if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) | ||
6814 | vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; | ||
6815 | } | ||
6816 | |||
6817 | /* | ||
6818 | * A part of what we need to when the nested L2 guest exits and we want to | ||
6819 | * run its L1 parent, is to reset L1's guest state to the host state specified | ||
6820 | * in vmcs12. | ||
6821 | * This function is to be called not only on normal nested exit, but also on | ||
6822 | * a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry | ||
6823 | * Failures During or After Loading Guest State"). | ||
6824 | * This function should be called when the active VMCS is L1's (vmcs01). | ||
6825 | */ | ||
6826 | void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | ||
6827 | { | ||
6828 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | ||
6829 | vcpu->arch.efer = vmcs12->host_ia32_efer; | ||
6830 | if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | ||
6831 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | ||
6832 | else | ||
6833 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | ||
6834 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
6835 | |||
6836 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); | ||
6837 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); | ||
6838 | /* | ||
6839 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't | ||
6840 | * actually changed, because it depends on the current state of | ||
6841 | * fpu_active (which may have changed). | ||
6842 | * Note that vmx_set_cr0 refers to efer set above. | ||
6843 | */ | ||
6844 | kvm_set_cr0(vcpu, vmcs12->host_cr0); | ||
6845 | /* | ||
6846 | * If we did fpu_activate()/fpu_deactivate() during L2's run, we need | ||
6847 | * to apply the same changes to L1's vmcs. We just set cr0 correctly, | ||
6848 | * but we also need to update cr0_guest_host_mask and exception_bitmap. | ||
6849 | */ | ||
6850 | update_exception_bitmap(vcpu); | ||
6851 | vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0); | ||
6852 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
6853 | |||
6854 | /* | ||
6855 | * Note that CR4_GUEST_HOST_MASK is already set in the original vmcs01 | ||
6856 | * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask(); | ||
6857 | */ | ||
6858 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); | ||
6859 | kvm_set_cr4(vcpu, vmcs12->host_cr4); | ||
6860 | |||
6861 | /* shadow page tables on either EPT or shadow page tables */ | ||
6862 | kvm_set_cr3(vcpu, vmcs12->host_cr3); | ||
6863 | kvm_mmu_reset_context(vcpu); | ||
6864 | |||
6865 | if (enable_vpid) { | ||
6866 | /* | ||
6867 | * Trivially support vpid by letting L2s share their parent | ||
6868 | * L1's vpid. TODO: move to a more elaborate solution, giving | ||
6869 | * each L2 its own vpid and exposing the vpid feature to L1. | ||
6870 | */ | ||
6871 | vmx_flush_tlb(vcpu); | ||
6872 | } | ||
6873 | |||
6874 | |||
6875 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); | ||
6876 | vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); | ||
6877 | vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); | ||
6878 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); | ||
6879 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); | ||
6880 | vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base); | ||
6881 | vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base); | ||
6882 | vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base); | ||
6883 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector); | ||
6884 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector); | ||
6885 | vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector); | ||
6886 | vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector); | ||
6887 | vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector); | ||
6888 | vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector); | ||
6889 | vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector); | ||
6890 | |||
6891 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) | ||
6892 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); | ||
6893 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | ||
6894 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | ||
6895 | vmcs12->host_ia32_perf_global_ctrl); | ||
6896 | } | ||
6897 | |||
6898 | /* | ||
6899 | * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1 | ||
6900 | * and modify vmcs12 to make it see what it would expect to see there if | ||
6901 | * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) | ||
6902 | */ | ||
6903 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | ||
6904 | { | ||
6905 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6906 | int cpu; | ||
6907 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
6908 | |||
6909 | leave_guest_mode(vcpu); | ||
6910 | prepare_vmcs12(vcpu, vmcs12); | ||
6911 | |||
6912 | cpu = get_cpu(); | ||
6913 | vmx->loaded_vmcs = &vmx->vmcs01; | ||
6914 | vmx_vcpu_put(vcpu); | ||
6915 | vmx_vcpu_load(vcpu, cpu); | ||
6916 | vcpu->cpu = cpu; | ||
6917 | put_cpu(); | ||
6918 | |||
6919 | /* if no vmcs02 cache requested, remove the one we used */ | ||
6920 | if (VMCS02_POOL_SIZE == 0) | ||
6921 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | ||
6922 | |||
6923 | load_vmcs12_host_state(vcpu, vmcs12); | ||
6924 | |||
6925 | /* Update TSC_OFFSET if vmx_adjust_tsc_offset() was used while L2 ran */ | ||
6926 | vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); | ||
6927 | |||
6928 | /* This is needed for same reason as it was needed in prepare_vmcs02 */ | ||
6929 | vmx->host_rsp = 0; | ||
6930 | |||
6931 | /* Unpin physical memory we referred to in vmcs02 */ | ||
6932 | if (vmx->nested.apic_access_page) { | ||
6933 | nested_release_page(vmx->nested.apic_access_page); | ||
6934 | vmx->nested.apic_access_page = 0; | ||
6935 | } | ||
6936 | |||
6937 | /* | ||
6938 | * Exiting from L2 to L1, we're now back to L1 which thinks it just | ||
6939 | * finished a VMLAUNCH or VMRESUME instruction, so we need to set the | ||
6940 | * success or failure flag accordingly. | ||
6941 | */ | ||
6942 | if (unlikely(vmx->fail)) { | ||
6943 | vmx->fail = 0; | ||
6944 | nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); | ||
6945 | } else | ||
6946 | nested_vmx_succeed(vcpu); | ||
6947 | } | ||
6948 | |||
6949 | /* | ||
6950 | * L1's failure to enter L2 is a subset of a normal exit, as explained in | ||
6951 | * 23.7 "VM-entry failures during or after loading guest state" (this also | ||
6952 | * lists the acceptable exit-reason and exit-qualification parameters). | ||
6953 | * It should only be called before L2 actually succeeded to run, and when | ||
6954 | * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss). | ||
6955 | */ | ||
6956 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | ||
6957 | struct vmcs12 *vmcs12, | ||
6958 | u32 reason, unsigned long qualification) | ||
6959 | { | ||
6960 | load_vmcs12_host_state(vcpu, vmcs12); | ||
6961 | vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; | ||
6962 | vmcs12->exit_qualification = qualification; | ||
6963 | nested_vmx_succeed(vcpu); | ||
4515 | } | 6964 | } |
4516 | 6965 | ||
4517 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, | 6966 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, |
@@ -4670,16 +7119,13 @@ static int __init vmx_init(void) | |||
4670 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 7119 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
4671 | 7120 | ||
4672 | if (enable_ept) { | 7121 | if (enable_ept) { |
4673 | bypass_guest_pf = 0; | ||
4674 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 7122 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
4675 | VMX_EPT_EXECUTABLE_MASK); | 7123 | VMX_EPT_EXECUTABLE_MASK); |
7124 | ept_set_mmio_spte_mask(); | ||
4676 | kvm_enable_tdp(); | 7125 | kvm_enable_tdp(); |
4677 | } else | 7126 | } else |
4678 | kvm_disable_tdp(); | 7127 | kvm_disable_tdp(); |
4679 | 7128 | ||
4680 | if (bypass_guest_pf) | ||
4681 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | ||
4682 | |||
4683 | return 0; | 7129 | return 0; |
4684 | 7130 | ||
4685 | out3: | 7131 | out3: |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 77c9d8673dc4..84a28ea45fa4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -347,6 +347,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
347 | vcpu->arch.cr2 = fault->address; | 347 | vcpu->arch.cr2 = fault->address; |
348 | kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); | 348 | kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); |
349 | } | 349 | } |
350 | EXPORT_SYMBOL_GPL(kvm_inject_page_fault); | ||
350 | 351 | ||
351 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | 352 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) |
352 | { | 353 | { |
@@ -579,6 +580,22 @@ static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | |||
579 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | 580 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); |
580 | } | 581 | } |
581 | 582 | ||
583 | static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | ||
584 | { | ||
585 | struct kvm_cpuid_entry2 *best; | ||
586 | |||
587 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
588 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | ||
589 | } | ||
590 | |||
591 | static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | ||
592 | { | ||
593 | struct kvm_cpuid_entry2 *best; | ||
594 | |||
595 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
596 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
597 | } | ||
598 | |||
582 | static void update_cpuid(struct kvm_vcpu *vcpu) | 599 | static void update_cpuid(struct kvm_vcpu *vcpu) |
583 | { | 600 | { |
584 | struct kvm_cpuid_entry2 *best; | 601 | struct kvm_cpuid_entry2 *best; |
@@ -598,14 +615,20 @@ static void update_cpuid(struct kvm_vcpu *vcpu) | |||
598 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 615 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
599 | { | 616 | { |
600 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | 617 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
601 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | 618 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | |
602 | 619 | X86_CR4_PAE | X86_CR4_SMEP; | |
603 | if (cr4 & CR4_RESERVED_BITS) | 620 | if (cr4 & CR4_RESERVED_BITS) |
604 | return 1; | 621 | return 1; |
605 | 622 | ||
606 | if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) | 623 | if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) |
607 | return 1; | 624 | return 1; |
608 | 625 | ||
626 | if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) | ||
627 | return 1; | ||
628 | |||
629 | if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS)) | ||
630 | return 1; | ||
631 | |||
609 | if (is_long_mode(vcpu)) { | 632 | if (is_long_mode(vcpu)) { |
610 | if (!(cr4 & X86_CR4_PAE)) | 633 | if (!(cr4 & X86_CR4_PAE)) |
611 | return 1; | 634 | return 1; |
@@ -615,11 +638,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
615 | kvm_read_cr3(vcpu))) | 638 | kvm_read_cr3(vcpu))) |
616 | return 1; | 639 | return 1; |
617 | 640 | ||
618 | if (cr4 & X86_CR4_VMXE) | 641 | if (kvm_x86_ops->set_cr4(vcpu, cr4)) |
619 | return 1; | 642 | return 1; |
620 | 643 | ||
621 | kvm_x86_ops->set_cr4(vcpu, cr4); | ||
622 | |||
623 | if ((cr4 ^ old_cr4) & pdptr_bits) | 644 | if ((cr4 ^ old_cr4) & pdptr_bits) |
624 | kvm_mmu_reset_context(vcpu); | 645 | kvm_mmu_reset_context(vcpu); |
625 | 646 | ||
@@ -787,12 +808,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); | |||
787 | * kvm-specific. Those are put in the beginning of the list. | 808 | * kvm-specific. Those are put in the beginning of the list. |
788 | */ | 809 | */ |
789 | 810 | ||
790 | #define KVM_SAVE_MSRS_BEGIN 8 | 811 | #define KVM_SAVE_MSRS_BEGIN 9 |
791 | static u32 msrs_to_save[] = { | 812 | static u32 msrs_to_save[] = { |
792 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 813 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
793 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 814 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
794 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 815 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
795 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, | 816 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
796 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 817 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
797 | MSR_STAR, | 818 | MSR_STAR, |
798 | #ifdef CONFIG_X86_64 | 819 | #ifdef CONFIG_X86_64 |
@@ -1388,7 +1409,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1388 | return 1; | 1409 | return 1; |
1389 | kvm_x86_ops->patch_hypercall(vcpu, instructions); | 1410 | kvm_x86_ops->patch_hypercall(vcpu, instructions); |
1390 | ((unsigned char *)instructions)[3] = 0xc3; /* ret */ | 1411 | ((unsigned char *)instructions)[3] = 0xc3; /* ret */ |
1391 | if (copy_to_user((void __user *)addr, instructions, 4)) | 1412 | if (__copy_to_user((void __user *)addr, instructions, 4)) |
1392 | return 1; | 1413 | return 1; |
1393 | kvm->arch.hv_hypercall = data; | 1414 | kvm->arch.hv_hypercall = data; |
1394 | break; | 1415 | break; |
@@ -1415,7 +1436,7 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1415 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); | 1436 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); |
1416 | if (kvm_is_error_hva(addr)) | 1437 | if (kvm_is_error_hva(addr)) |
1417 | return 1; | 1438 | return 1; |
1418 | if (clear_user((void __user *)addr, PAGE_SIZE)) | 1439 | if (__clear_user((void __user *)addr, PAGE_SIZE)) |
1419 | return 1; | 1440 | return 1; |
1420 | vcpu->arch.hv_vapic = data; | 1441 | vcpu->arch.hv_vapic = data; |
1421 | break; | 1442 | break; |
@@ -1467,6 +1488,35 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu) | |||
1467 | } | 1488 | } |
1468 | } | 1489 | } |
1469 | 1490 | ||
1491 | static void accumulate_steal_time(struct kvm_vcpu *vcpu) | ||
1492 | { | ||
1493 | u64 delta; | ||
1494 | |||
1495 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) | ||
1496 | return; | ||
1497 | |||
1498 | delta = current->sched_info.run_delay - vcpu->arch.st.last_steal; | ||
1499 | vcpu->arch.st.last_steal = current->sched_info.run_delay; | ||
1500 | vcpu->arch.st.accum_steal = delta; | ||
1501 | } | ||
1502 | |||
1503 | static void record_steal_time(struct kvm_vcpu *vcpu) | ||
1504 | { | ||
1505 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) | ||
1506 | return; | ||
1507 | |||
1508 | if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | ||
1509 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) | ||
1510 | return; | ||
1511 | |||
1512 | vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal; | ||
1513 | vcpu->arch.st.steal.version += 2; | ||
1514 | vcpu->arch.st.accum_steal = 0; | ||
1515 | |||
1516 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | ||
1517 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | ||
1518 | } | ||
1519 | |||
1470 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1520 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1471 | { | 1521 | { |
1472 | switch (msr) { | 1522 | switch (msr) { |
@@ -1549,6 +1599,33 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1549 | if (kvm_pv_enable_async_pf(vcpu, data)) | 1599 | if (kvm_pv_enable_async_pf(vcpu, data)) |
1550 | return 1; | 1600 | return 1; |
1551 | break; | 1601 | break; |
1602 | case MSR_KVM_STEAL_TIME: | ||
1603 | |||
1604 | if (unlikely(!sched_info_on())) | ||
1605 | return 1; | ||
1606 | |||
1607 | if (data & KVM_STEAL_RESERVED_MASK) | ||
1608 | return 1; | ||
1609 | |||
1610 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, | ||
1611 | data & KVM_STEAL_VALID_BITS)) | ||
1612 | return 1; | ||
1613 | |||
1614 | vcpu->arch.st.msr_val = data; | ||
1615 | |||
1616 | if (!(data & KVM_MSR_ENABLED)) | ||
1617 | break; | ||
1618 | |||
1619 | vcpu->arch.st.last_steal = current->sched_info.run_delay; | ||
1620 | |||
1621 | preempt_disable(); | ||
1622 | accumulate_steal_time(vcpu); | ||
1623 | preempt_enable(); | ||
1624 | |||
1625 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | ||
1626 | |||
1627 | break; | ||
1628 | |||
1552 | case MSR_IA32_MCG_CTL: | 1629 | case MSR_IA32_MCG_CTL: |
1553 | case MSR_IA32_MCG_STATUS: | 1630 | case MSR_IA32_MCG_STATUS: |
1554 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1631 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
@@ -1834,6 +1911,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1834 | case MSR_KVM_ASYNC_PF_EN: | 1911 | case MSR_KVM_ASYNC_PF_EN: |
1835 | data = vcpu->arch.apf.msr_val; | 1912 | data = vcpu->arch.apf.msr_val; |
1836 | break; | 1913 | break; |
1914 | case MSR_KVM_STEAL_TIME: | ||
1915 | data = vcpu->arch.st.msr_val; | ||
1916 | break; | ||
1837 | case MSR_IA32_P5_MC_ADDR: | 1917 | case MSR_IA32_P5_MC_ADDR: |
1838 | case MSR_IA32_P5_MC_TYPE: | 1918 | case MSR_IA32_P5_MC_TYPE: |
1839 | case MSR_IA32_MCG_CAP: | 1919 | case MSR_IA32_MCG_CAP: |
@@ -2145,6 +2225,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2145 | kvm_migrate_timers(vcpu); | 2225 | kvm_migrate_timers(vcpu); |
2146 | vcpu->cpu = cpu; | 2226 | vcpu->cpu = cpu; |
2147 | } | 2227 | } |
2228 | |||
2229 | accumulate_steal_time(vcpu); | ||
2230 | kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); | ||
2148 | } | 2231 | } |
2149 | 2232 | ||
2150 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 2233 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
@@ -2283,6 +2366,13 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2283 | entry->flags = 0; | 2366 | entry->flags = 0; |
2284 | } | 2367 | } |
2285 | 2368 | ||
2369 | static bool supported_xcr0_bit(unsigned bit) | ||
2370 | { | ||
2371 | u64 mask = ((u64)1 << bit); | ||
2372 | |||
2373 | return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0; | ||
2374 | } | ||
2375 | |||
2286 | #define F(x) bit(X86_FEATURE_##x) | 2376 | #define F(x) bit(X86_FEATURE_##x) |
2287 | 2377 | ||
2288 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 2378 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
@@ -2328,7 +2418,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2328 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 2418 | 0 /* Reserved, DCA */ | F(XMM4_1) | |
2329 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 2419 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
2330 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | | 2420 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | |
2331 | F(F16C); | 2421 | F(F16C) | F(RDRAND); |
2332 | /* cpuid 0x80000001.ecx */ | 2422 | /* cpuid 0x80000001.ecx */ |
2333 | const u32 kvm_supported_word6_x86_features = | 2423 | const u32 kvm_supported_word6_x86_features = |
2334 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | 2424 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | |
@@ -2342,6 +2432,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2342 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | 2432 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | |
2343 | F(PMM) | F(PMM_EN); | 2433 | F(PMM) | F(PMM_EN); |
2344 | 2434 | ||
2435 | /* cpuid 7.0.ebx */ | ||
2436 | const u32 kvm_supported_word9_x86_features = | ||
2437 | F(SMEP) | F(FSGSBASE) | F(ERMS); | ||
2438 | |||
2345 | /* all calls to cpuid_count() should be made on the same cpu */ | 2439 | /* all calls to cpuid_count() should be made on the same cpu */ |
2346 | get_cpu(); | 2440 | get_cpu(); |
2347 | do_cpuid_1_ent(entry, function, index); | 2441 | do_cpuid_1_ent(entry, function, index); |
@@ -2376,7 +2470,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2376 | } | 2470 | } |
2377 | break; | 2471 | break; |
2378 | } | 2472 | } |
2379 | /* function 4 and 0xb have additional index. */ | 2473 | /* function 4 has additional index. */ |
2380 | case 4: { | 2474 | case 4: { |
2381 | int i, cache_type; | 2475 | int i, cache_type; |
2382 | 2476 | ||
@@ -2393,6 +2487,22 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2393 | } | 2487 | } |
2394 | break; | 2488 | break; |
2395 | } | 2489 | } |
2490 | case 7: { | ||
2491 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2492 | /* Mask ebx against host capbability word 9 */ | ||
2493 | if (index == 0) { | ||
2494 | entry->ebx &= kvm_supported_word9_x86_features; | ||
2495 | cpuid_mask(&entry->ebx, 9); | ||
2496 | } else | ||
2497 | entry->ebx = 0; | ||
2498 | entry->eax = 0; | ||
2499 | entry->ecx = 0; | ||
2500 | entry->edx = 0; | ||
2501 | break; | ||
2502 | } | ||
2503 | case 9: | ||
2504 | break; | ||
2505 | /* function 0xb has additional index. */ | ||
2396 | case 0xb: { | 2506 | case 0xb: { |
2397 | int i, level_type; | 2507 | int i, level_type; |
2398 | 2508 | ||
@@ -2410,16 +2520,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2410 | break; | 2520 | break; |
2411 | } | 2521 | } |
2412 | case 0xd: { | 2522 | case 0xd: { |
2413 | int i; | 2523 | int idx, i; |
2414 | 2524 | ||
2415 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 2525 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
2416 | for (i = 1; *nent < maxnent && i < 64; ++i) { | 2526 | for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) { |
2417 | if (entry[i].eax == 0) | 2527 | do_cpuid_1_ent(&entry[i], function, idx); |
2528 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | ||
2418 | continue; | 2529 | continue; |
2419 | do_cpuid_1_ent(&entry[i], function, i); | ||
2420 | entry[i].flags |= | 2530 | entry[i].flags |= |
2421 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 2531 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
2422 | ++*nent; | 2532 | ++*nent; |
2533 | ++i; | ||
2423 | } | 2534 | } |
2424 | break; | 2535 | break; |
2425 | } | 2536 | } |
@@ -2438,6 +2549,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2438 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 2549 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
2439 | (1 << KVM_FEATURE_ASYNC_PF) | | 2550 | (1 << KVM_FEATURE_ASYNC_PF) | |
2440 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 2551 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); |
2552 | |||
2553 | if (sched_info_on()) | ||
2554 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
2555 | |||
2441 | entry->ebx = 0; | 2556 | entry->ebx = 0; |
2442 | entry->ecx = 0; | 2557 | entry->ecx = 0; |
2443 | entry->edx = 0; | 2558 | entry->edx = 0; |
@@ -2451,6 +2566,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2451 | entry->ecx &= kvm_supported_word6_x86_features; | 2566 | entry->ecx &= kvm_supported_word6_x86_features; |
2452 | cpuid_mask(&entry->ecx, 6); | 2567 | cpuid_mask(&entry->ecx, 6); |
2453 | break; | 2568 | break; |
2569 | case 0x80000008: { | ||
2570 | unsigned g_phys_as = (entry->eax >> 16) & 0xff; | ||
2571 | unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); | ||
2572 | unsigned phys_as = entry->eax & 0xff; | ||
2573 | |||
2574 | if (!g_phys_as) | ||
2575 | g_phys_as = phys_as; | ||
2576 | entry->eax = g_phys_as | (virt_as << 8); | ||
2577 | entry->ebx = entry->edx = 0; | ||
2578 | break; | ||
2579 | } | ||
2580 | case 0x80000019: | ||
2581 | entry->ecx = entry->edx = 0; | ||
2582 | break; | ||
2583 | case 0x8000001a: | ||
2584 | break; | ||
2585 | case 0x8000001d: | ||
2586 | break; | ||
2454 | /*Add support for Centaur's CPUID instruction*/ | 2587 | /*Add support for Centaur's CPUID instruction*/ |
2455 | case 0xC0000000: | 2588 | case 0xC0000000: |
2456 | /*Just support up to 0xC0000004 now*/ | 2589 | /*Just support up to 0xC0000004 now*/ |
@@ -2460,10 +2593,16 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2460 | entry->edx &= kvm_supported_word5_x86_features; | 2593 | entry->edx &= kvm_supported_word5_x86_features; |
2461 | cpuid_mask(&entry->edx, 5); | 2594 | cpuid_mask(&entry->edx, 5); |
2462 | break; | 2595 | break; |
2596 | case 3: /* Processor serial number */ | ||
2597 | case 5: /* MONITOR/MWAIT */ | ||
2598 | case 6: /* Thermal management */ | ||
2599 | case 0xA: /* Architectural Performance Monitoring */ | ||
2600 | case 0x80000007: /* Advanced power management */ | ||
2463 | case 0xC0000002: | 2601 | case 0xC0000002: |
2464 | case 0xC0000003: | 2602 | case 0xC0000003: |
2465 | case 0xC0000004: | 2603 | case 0xC0000004: |
2466 | /*Now nothing to do, reserved for the future*/ | 2604 | default: |
2605 | entry->eax = entry->ebx = entry->ecx = entry->edx = 0; | ||
2467 | break; | 2606 | break; |
2468 | } | 2607 | } |
2469 | 2608 | ||
@@ -3817,7 +3956,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, | |||
3817 | exception); | 3956 | exception); |
3818 | } | 3957 | } |
3819 | 3958 | ||
3820 | static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, | 3959 | int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, |
3821 | gva_t addr, void *val, unsigned int bytes, | 3960 | gva_t addr, void *val, unsigned int bytes, |
3822 | struct x86_exception *exception) | 3961 | struct x86_exception *exception) |
3823 | { | 3962 | { |
@@ -3827,6 +3966,7 @@ static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, | |||
3827 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | 3966 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, |
3828 | exception); | 3967 | exception); |
3829 | } | 3968 | } |
3969 | EXPORT_SYMBOL_GPL(kvm_read_guest_virt); | ||
3830 | 3970 | ||
3831 | static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, | 3971 | static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3832 | gva_t addr, void *val, unsigned int bytes, | 3972 | gva_t addr, void *val, unsigned int bytes, |
@@ -3836,7 +3976,7 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
3836 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); | 3976 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); |
3837 | } | 3977 | } |
3838 | 3978 | ||
3839 | static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | 3979 | int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3840 | gva_t addr, void *val, | 3980 | gva_t addr, void *val, |
3841 | unsigned int bytes, | 3981 | unsigned int bytes, |
3842 | struct x86_exception *exception) | 3982 | struct x86_exception *exception) |
@@ -3868,6 +4008,42 @@ static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
3868 | out: | 4008 | out: |
3869 | return r; | 4009 | return r; |
3870 | } | 4010 | } |
4011 | EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); | ||
4012 | |||
4013 | static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | ||
4014 | gpa_t *gpa, struct x86_exception *exception, | ||
4015 | bool write) | ||
4016 | { | ||
4017 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
4018 | |||
4019 | if (vcpu_match_mmio_gva(vcpu, gva) && | ||
4020 | check_write_user_access(vcpu, write, access, | ||
4021 | vcpu->arch.access)) { | ||
4022 | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | | ||
4023 | (gva & (PAGE_SIZE - 1)); | ||
4024 | trace_vcpu_match_mmio(gva, *gpa, write, false); | ||
4025 | return 1; | ||
4026 | } | ||
4027 | |||
4028 | if (write) | ||
4029 | access |= PFERR_WRITE_MASK; | ||
4030 | |||
4031 | *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); | ||
4032 | |||
4033 | if (*gpa == UNMAPPED_GVA) | ||
4034 | return -1; | ||
4035 | |||
4036 | /* For APIC access vmexit */ | ||
4037 | if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
4038 | return 1; | ||
4039 | |||
4040 | if (vcpu_match_mmio_gpa(vcpu, *gpa)) { | ||
4041 | trace_vcpu_match_mmio(gva, *gpa, write, true); | ||
4042 | return 1; | ||
4043 | } | ||
4044 | |||
4045 | return 0; | ||
4046 | } | ||
3871 | 4047 | ||
3872 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | 4048 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, |
3873 | unsigned long addr, | 4049 | unsigned long addr, |
@@ -3876,8 +4052,8 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | |||
3876 | struct x86_exception *exception) | 4052 | struct x86_exception *exception) |
3877 | { | 4053 | { |
3878 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 4054 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
3879 | gpa_t gpa; | 4055 | gpa_t gpa; |
3880 | int handled; | 4056 | int handled, ret; |
3881 | 4057 | ||
3882 | if (vcpu->mmio_read_completed) { | 4058 | if (vcpu->mmio_read_completed) { |
3883 | memcpy(val, vcpu->mmio_data, bytes); | 4059 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -3887,13 +4063,12 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, | |||
3887 | return X86EMUL_CONTINUE; | 4063 | return X86EMUL_CONTINUE; |
3888 | } | 4064 | } |
3889 | 4065 | ||
3890 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception); | 4066 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, false); |
3891 | 4067 | ||
3892 | if (gpa == UNMAPPED_GVA) | 4068 | if (ret < 0) |
3893 | return X86EMUL_PROPAGATE_FAULT; | 4069 | return X86EMUL_PROPAGATE_FAULT; |
3894 | 4070 | ||
3895 | /* For APIC access vmexit */ | 4071 | if (ret) |
3896 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
3897 | goto mmio; | 4072 | goto mmio; |
3898 | 4073 | ||
3899 | if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) | 4074 | if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) |
@@ -3944,16 +4119,16 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
3944 | struct x86_exception *exception, | 4119 | struct x86_exception *exception, |
3945 | struct kvm_vcpu *vcpu) | 4120 | struct kvm_vcpu *vcpu) |
3946 | { | 4121 | { |
3947 | gpa_t gpa; | 4122 | gpa_t gpa; |
3948 | int handled; | 4123 | int handled, ret; |
3949 | 4124 | ||
3950 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); | 4125 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, true); |
3951 | 4126 | ||
3952 | if (gpa == UNMAPPED_GVA) | 4127 | if (ret < 0) |
3953 | return X86EMUL_PROPAGATE_FAULT; | 4128 | return X86EMUL_PROPAGATE_FAULT; |
3954 | 4129 | ||
3955 | /* For APIC access vmexit */ | 4130 | /* For APIC access vmexit */ |
3956 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 4131 | if (ret) |
3957 | goto mmio; | 4132 | goto mmio; |
3958 | 4133 | ||
3959 | if (emulator_write_phys(vcpu, gpa, val, bytes)) | 4134 | if (emulator_write_phys(vcpu, gpa, val, bytes)) |
@@ -4473,9 +4648,24 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) | |||
4473 | kvm_queue_exception(vcpu, ctxt->exception.vector); | 4648 | kvm_queue_exception(vcpu, ctxt->exception.vector); |
4474 | } | 4649 | } |
4475 | 4650 | ||
4651 | static void init_decode_cache(struct x86_emulate_ctxt *ctxt, | ||
4652 | const unsigned long *regs) | ||
4653 | { | ||
4654 | memset(&ctxt->twobyte, 0, | ||
4655 | (void *)&ctxt->regs - (void *)&ctxt->twobyte); | ||
4656 | memcpy(ctxt->regs, regs, sizeof(ctxt->regs)); | ||
4657 | |||
4658 | ctxt->fetch.start = 0; | ||
4659 | ctxt->fetch.end = 0; | ||
4660 | ctxt->io_read.pos = 0; | ||
4661 | ctxt->io_read.end = 0; | ||
4662 | ctxt->mem_read.pos = 0; | ||
4663 | ctxt->mem_read.end = 0; | ||
4664 | } | ||
4665 | |||
4476 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | 4666 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) |
4477 | { | 4667 | { |
4478 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4668 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4479 | int cs_db, cs_l; | 4669 | int cs_db, cs_l; |
4480 | 4670 | ||
4481 | /* | 4671 | /* |
@@ -4488,40 +4678,38 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | |||
4488 | 4678 | ||
4489 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 4679 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4490 | 4680 | ||
4491 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 4681 | ctxt->eflags = kvm_get_rflags(vcpu); |
4492 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | 4682 | ctxt->eip = kvm_rip_read(vcpu); |
4493 | vcpu->arch.emulate_ctxt.mode = | 4683 | ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
4494 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 4684 | (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : |
4495 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 4685 | cs_l ? X86EMUL_MODE_PROT64 : |
4496 | ? X86EMUL_MODE_VM86 : cs_l | 4686 | cs_db ? X86EMUL_MODE_PROT32 : |
4497 | ? X86EMUL_MODE_PROT64 : cs_db | 4687 | X86EMUL_MODE_PROT16; |
4498 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 4688 | ctxt->guest_mode = is_guest_mode(vcpu); |
4499 | vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); | 4689 | |
4500 | memset(c, 0, sizeof(struct decode_cache)); | 4690 | init_decode_cache(ctxt, vcpu->arch.regs); |
4501 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4502 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | 4691 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; |
4503 | } | 4692 | } |
4504 | 4693 | ||
4505 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) | 4694 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) |
4506 | { | 4695 | { |
4507 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4696 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4508 | int ret; | 4697 | int ret; |
4509 | 4698 | ||
4510 | init_emulate_ctxt(vcpu); | 4699 | init_emulate_ctxt(vcpu); |
4511 | 4700 | ||
4512 | vcpu->arch.emulate_ctxt.decode.op_bytes = 2; | 4701 | ctxt->op_bytes = 2; |
4513 | vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; | 4702 | ctxt->ad_bytes = 2; |
4514 | vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + | 4703 | ctxt->_eip = ctxt->eip + inc_eip; |
4515 | inc_eip; | 4704 | ret = emulate_int_real(ctxt, irq); |
4516 | ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); | ||
4517 | 4705 | ||
4518 | if (ret != X86EMUL_CONTINUE) | 4706 | if (ret != X86EMUL_CONTINUE) |
4519 | return EMULATE_FAIL; | 4707 | return EMULATE_FAIL; |
4520 | 4708 | ||
4521 | vcpu->arch.emulate_ctxt.eip = c->eip; | 4709 | ctxt->eip = ctxt->_eip; |
4522 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 4710 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); |
4523 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 4711 | kvm_rip_write(vcpu, ctxt->eip); |
4524 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 4712 | kvm_set_rflags(vcpu, ctxt->eflags); |
4525 | 4713 | ||
4526 | if (irq == NMI_VECTOR) | 4714 | if (irq == NMI_VECTOR) |
4527 | vcpu->arch.nmi_pending = false; | 4715 | vcpu->arch.nmi_pending = false; |
@@ -4582,21 +4770,21 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4582 | int insn_len) | 4770 | int insn_len) |
4583 | { | 4771 | { |
4584 | int r; | 4772 | int r; |
4585 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4773 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
4586 | bool writeback = true; | 4774 | bool writeback = true; |
4587 | 4775 | ||
4588 | kvm_clear_exception_queue(vcpu); | 4776 | kvm_clear_exception_queue(vcpu); |
4589 | 4777 | ||
4590 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4778 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
4591 | init_emulate_ctxt(vcpu); | 4779 | init_emulate_ctxt(vcpu); |
4592 | vcpu->arch.emulate_ctxt.interruptibility = 0; | 4780 | ctxt->interruptibility = 0; |
4593 | vcpu->arch.emulate_ctxt.have_exception = false; | 4781 | ctxt->have_exception = false; |
4594 | vcpu->arch.emulate_ctxt.perm_ok = false; | 4782 | ctxt->perm_ok = false; |
4595 | 4783 | ||
4596 | vcpu->arch.emulate_ctxt.only_vendor_specific_insn | 4784 | ctxt->only_vendor_specific_insn |
4597 | = emulation_type & EMULTYPE_TRAP_UD; | 4785 | = emulation_type & EMULTYPE_TRAP_UD; |
4598 | 4786 | ||
4599 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); | 4787 | r = x86_decode_insn(ctxt, insn, insn_len); |
4600 | 4788 | ||
4601 | trace_kvm_emulate_insn_start(vcpu); | 4789 | trace_kvm_emulate_insn_start(vcpu); |
4602 | ++vcpu->stat.insn_emulation; | 4790 | ++vcpu->stat.insn_emulation; |
@@ -4612,7 +4800,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4612 | } | 4800 | } |
4613 | 4801 | ||
4614 | if (emulation_type & EMULTYPE_SKIP) { | 4802 | if (emulation_type & EMULTYPE_SKIP) { |
4615 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); | 4803 | kvm_rip_write(vcpu, ctxt->_eip); |
4616 | return EMULATE_DONE; | 4804 | return EMULATE_DONE; |
4617 | } | 4805 | } |
4618 | 4806 | ||
@@ -4620,11 +4808,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4620 | changes registers values during IO operation */ | 4808 | changes registers values during IO operation */ |
4621 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { | 4809 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
4622 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | 4810 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; |
4623 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | 4811 | memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs); |
4624 | } | 4812 | } |
4625 | 4813 | ||
4626 | restart: | 4814 | restart: |
4627 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); | 4815 | r = x86_emulate_insn(ctxt); |
4628 | 4816 | ||
4629 | if (r == EMULATION_INTERCEPTED) | 4817 | if (r == EMULATION_INTERCEPTED) |
4630 | return EMULATE_DONE; | 4818 | return EMULATE_DONE; |
@@ -4636,7 +4824,7 @@ restart: | |||
4636 | return handle_emulation_failure(vcpu); | 4824 | return handle_emulation_failure(vcpu); |
4637 | } | 4825 | } |
4638 | 4826 | ||
4639 | if (vcpu->arch.emulate_ctxt.have_exception) { | 4827 | if (ctxt->have_exception) { |
4640 | inject_emulated_exception(vcpu); | 4828 | inject_emulated_exception(vcpu); |
4641 | r = EMULATE_DONE; | 4829 | r = EMULATE_DONE; |
4642 | } else if (vcpu->arch.pio.count) { | 4830 | } else if (vcpu->arch.pio.count) { |
@@ -4655,13 +4843,12 @@ restart: | |||
4655 | r = EMULATE_DONE; | 4843 | r = EMULATE_DONE; |
4656 | 4844 | ||
4657 | if (writeback) { | 4845 | if (writeback) { |
4658 | toggle_interruptibility(vcpu, | 4846 | toggle_interruptibility(vcpu, ctxt->interruptibility); |
4659 | vcpu->arch.emulate_ctxt.interruptibility); | 4847 | kvm_set_rflags(vcpu, ctxt->eflags); |
4660 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
4661 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 4848 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
4662 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 4849 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); |
4663 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 4850 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
4664 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 4851 | kvm_rip_write(vcpu, ctxt->eip); |
4665 | } else | 4852 | } else |
4666 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | 4853 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; |
4667 | 4854 | ||
@@ -4878,6 +5065,30 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | |||
4878 | } | 5065 | } |
4879 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | 5066 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); |
4880 | 5067 | ||
5068 | static void kvm_set_mmio_spte_mask(void) | ||
5069 | { | ||
5070 | u64 mask; | ||
5071 | int maxphyaddr = boot_cpu_data.x86_phys_bits; | ||
5072 | |||
5073 | /* | ||
5074 | * Set the reserved bits and the present bit of an paging-structure | ||
5075 | * entry to generate page fault with PFER.RSV = 1. | ||
5076 | */ | ||
5077 | mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr; | ||
5078 | mask |= 1ull; | ||
5079 | |||
5080 | #ifdef CONFIG_X86_64 | ||
5081 | /* | ||
5082 | * If reserved bit is not supported, clear the present bit to disable | ||
5083 | * mmio page fault. | ||
5084 | */ | ||
5085 | if (maxphyaddr == 52) | ||
5086 | mask &= ~1ull; | ||
5087 | #endif | ||
5088 | |||
5089 | kvm_mmu_set_mmio_spte_mask(mask); | ||
5090 | } | ||
5091 | |||
4881 | int kvm_arch_init(void *opaque) | 5092 | int kvm_arch_init(void *opaque) |
4882 | { | 5093 | { |
4883 | int r; | 5094 | int r; |
@@ -4904,10 +5115,10 @@ int kvm_arch_init(void *opaque) | |||
4904 | if (r) | 5115 | if (r) |
4905 | goto out; | 5116 | goto out; |
4906 | 5117 | ||
5118 | kvm_set_mmio_spte_mask(); | ||
4907 | kvm_init_msr_list(); | 5119 | kvm_init_msr_list(); |
4908 | 5120 | ||
4909 | kvm_x86_ops = ops; | 5121 | kvm_x86_ops = ops; |
4910 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | ||
4911 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 5122 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
4912 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 5123 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
4913 | 5124 | ||
@@ -5082,8 +5293,7 @@ int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) | |||
5082 | 5293 | ||
5083 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 5294 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
5084 | 5295 | ||
5085 | return emulator_write_emulated(&vcpu->arch.emulate_ctxt, | 5296 | return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); |
5086 | rip, instruction, 3, NULL); | ||
5087 | } | 5297 | } |
5088 | 5298 | ||
5089 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 5299 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
@@ -5384,6 +5594,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5384 | r = 1; | 5594 | r = 1; |
5385 | goto out; | 5595 | goto out; |
5386 | } | 5596 | } |
5597 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) | ||
5598 | record_steal_time(vcpu); | ||
5599 | |||
5387 | } | 5600 | } |
5388 | 5601 | ||
5389 | r = kvm_mmu_reload(vcpu); | 5602 | r = kvm_mmu_reload(vcpu); |
@@ -5671,8 +5884,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
5671 | * that usually, but some bad designed PV devices (vmware | 5884 | * that usually, but some bad designed PV devices (vmware |
5672 | * backdoor interface) need this to work | 5885 | * backdoor interface) need this to work |
5673 | */ | 5886 | */ |
5674 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 5887 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
5675 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 5888 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); |
5676 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 5889 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
5677 | } | 5890 | } |
5678 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 5891 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
@@ -5801,21 +6014,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
5801 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 6014 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
5802 | bool has_error_code, u32 error_code) | 6015 | bool has_error_code, u32 error_code) |
5803 | { | 6016 | { |
5804 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 6017 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
5805 | int ret; | 6018 | int ret; |
5806 | 6019 | ||
5807 | init_emulate_ctxt(vcpu); | 6020 | init_emulate_ctxt(vcpu); |
5808 | 6021 | ||
5809 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, | 6022 | ret = emulator_task_switch(ctxt, tss_selector, reason, |
5810 | tss_selector, reason, has_error_code, | 6023 | has_error_code, error_code); |
5811 | error_code); | ||
5812 | 6024 | ||
5813 | if (ret) | 6025 | if (ret) |
5814 | return EMULATE_FAIL; | 6026 | return EMULATE_FAIL; |
5815 | 6027 | ||
5816 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 6028 | memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); |
5817 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 6029 | kvm_rip_write(vcpu, ctxt->eip); |
5818 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 6030 | kvm_set_rflags(vcpu, ctxt->eflags); |
5819 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6031 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5820 | return EMULATE_DONE; | 6032 | return EMULATE_DONE; |
5821 | } | 6033 | } |
@@ -6093,12 +6305,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6093 | if (r == 0) | 6305 | if (r == 0) |
6094 | r = kvm_mmu_setup(vcpu); | 6306 | r = kvm_mmu_setup(vcpu); |
6095 | vcpu_put(vcpu); | 6307 | vcpu_put(vcpu); |
6096 | if (r < 0) | ||
6097 | goto free_vcpu; | ||
6098 | 6308 | ||
6099 | return 0; | ||
6100 | free_vcpu: | ||
6101 | kvm_x86_ops->vcpu_free(vcpu); | ||
6102 | return r; | 6309 | return r; |
6103 | } | 6310 | } |
6104 | 6311 | ||
@@ -6126,6 +6333,7 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6126 | 6333 | ||
6127 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6334 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6128 | vcpu->arch.apf.msr_val = 0; | 6335 | vcpu->arch.apf.msr_val = 0; |
6336 | vcpu->arch.st.msr_val = 0; | ||
6129 | 6337 | ||
6130 | kvmclock_reset(vcpu); | 6338 | kvmclock_reset(vcpu); |
6131 | 6339 | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e407ed3df817..d36fe237c665 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -75,10 +75,54 @@ static inline u32 bit(int bitno) | |||
75 | return 1 << (bitno & 31); | 75 | return 1 << (bitno & 31); |
76 | } | 76 | } |
77 | 77 | ||
78 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | ||
79 | gva_t gva, gfn_t gfn, unsigned access) | ||
80 | { | ||
81 | vcpu->arch.mmio_gva = gva & PAGE_MASK; | ||
82 | vcpu->arch.access = access; | ||
83 | vcpu->arch.mmio_gfn = gfn; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Clear the mmio cache info for the given gva, | ||
88 | * specially, if gva is ~0ul, we clear all mmio cache info. | ||
89 | */ | ||
90 | static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) | ||
91 | { | ||
92 | if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) | ||
93 | return; | ||
94 | |||
95 | vcpu->arch.mmio_gva = 0; | ||
96 | } | ||
97 | |||
98 | static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) | ||
99 | { | ||
100 | if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) | ||
101 | return true; | ||
102 | |||
103 | return false; | ||
104 | } | ||
105 | |||
106 | static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) | ||
107 | { | ||
108 | if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) | ||
109 | return true; | ||
110 | |||
111 | return false; | ||
112 | } | ||
113 | |||
78 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 114 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
79 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 115 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
80 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); | 116 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); |
81 | 117 | ||
82 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); | 118 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data); |
83 | 119 | ||
120 | int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, | ||
121 | gva_t addr, void *val, unsigned int bytes, | ||
122 | struct x86_exception *exception); | ||
123 | |||
124 | int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | ||
125 | gva_t addr, void *val, unsigned int bytes, | ||
126 | struct x86_exception *exception); | ||
127 | |||
84 | #endif | 128 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index db832fd65ecb..13ee258442ae 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -71,7 +71,8 @@ | |||
71 | #include <asm/stackprotector.h> | 71 | #include <asm/stackprotector.h> |
72 | #include <asm/reboot.h> /* for struct machine_ops */ | 72 | #include <asm/reboot.h> /* for struct machine_ops */ |
73 | 73 | ||
74 | /*G:010 Welcome to the Guest! | 74 | /*G:010 |
75 | * Welcome to the Guest! | ||
75 | * | 76 | * |
76 | * The Guest in our tale is a simple creature: identical to the Host but | 77 | * The Guest in our tale is a simple creature: identical to the Host but |
77 | * behaving in simplified but equivalent ways. In particular, the Guest is the | 78 | * behaving in simplified but equivalent ways. In particular, the Guest is the |
@@ -190,15 +191,23 @@ static void lazy_hcall4(unsigned long call, | |||
190 | #endif | 191 | #endif |
191 | 192 | ||
192 | /*G:036 | 193 | /*G:036 |
193 | * When lazy mode is turned off reset the per-cpu lazy mode variable and then | 194 | * When lazy mode is turned off, we issue the do-nothing hypercall to |
194 | * issue the do-nothing hypercall to flush any stored calls. | 195 | * flush any stored calls, and call the generic helper to reset the |
195 | :*/ | 196 | * per-cpu lazy mode variable. |
197 | */ | ||
196 | static void lguest_leave_lazy_mmu_mode(void) | 198 | static void lguest_leave_lazy_mmu_mode(void) |
197 | { | 199 | { |
198 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); | 200 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); |
199 | paravirt_leave_lazy_mmu(); | 201 | paravirt_leave_lazy_mmu(); |
200 | } | 202 | } |
201 | 203 | ||
204 | /* | ||
205 | * We also catch the end of context switch; we enter lazy mode for much of | ||
206 | * that too, so again we need to flush here. | ||
207 | * | ||
208 | * (Technically, this is lazy CPU mode, and normally we're in lazy MMU | ||
209 | * mode, but unlike Xen, lguest doesn't care about the difference). | ||
210 | */ | ||
202 | static void lguest_end_context_switch(struct task_struct *next) | 211 | static void lguest_end_context_switch(struct task_struct *next) |
203 | { | 212 | { |
204 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); | 213 | hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); |
@@ -391,7 +400,7 @@ static void lguest_load_tr_desc(void) | |||
391 | * giant ball of hair. Its entry in the current Intel manual runs to 28 pages. | 400 | * giant ball of hair. Its entry in the current Intel manual runs to 28 pages. |
392 | * | 401 | * |
393 | * This instruction even it has its own Wikipedia entry. The Wikipedia entry | 402 | * This instruction even it has its own Wikipedia entry. The Wikipedia entry |
394 | * has been translated into 5 languages. I am not making this up! | 403 | * has been translated into 6 languages. I am not making this up! |
395 | * | 404 | * |
396 | * We could get funky here and identify ourselves as "GenuineLguest", but | 405 | * We could get funky here and identify ourselves as "GenuineLguest", but |
397 | * instead we just use the real "cpuid" instruction. Then I pretty much turned | 406 | * instead we just use the real "cpuid" instruction. Then I pretty much turned |
@@ -458,7 +467,7 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
458 | /* | 467 | /* |
459 | * PAE systems can mark pages as non-executable. Linux calls this the | 468 | * PAE systems can mark pages as non-executable. Linux calls this the |
460 | * NX bit. Intel calls it XD (eXecute Disable), AMD EVP (Enhanced | 469 | * NX bit. Intel calls it XD (eXecute Disable), AMD EVP (Enhanced |
461 | * Virus Protection). We just switch turn if off here, since we don't | 470 | * Virus Protection). We just switch it off here, since we don't |
462 | * support it. | 471 | * support it. |
463 | */ | 472 | */ |
464 | case 0x80000001: | 473 | case 0x80000001: |
@@ -520,17 +529,16 @@ static unsigned long lguest_read_cr2(void) | |||
520 | 529 | ||
521 | /* See lguest_set_pte() below. */ | 530 | /* See lguest_set_pte() below. */ |
522 | static bool cr3_changed = false; | 531 | static bool cr3_changed = false; |
532 | static unsigned long current_cr3; | ||
523 | 533 | ||
524 | /* | 534 | /* |
525 | * cr3 is the current toplevel pagetable page: the principle is the same as | 535 | * cr3 is the current toplevel pagetable page: the principle is the same as |
526 | * cr0. Keep a local copy, and tell the Host when it changes. The only | 536 | * cr0. Keep a local copy, and tell the Host when it changes. |
527 | * difference is that our local copy is in lguest_data because the Host needs | ||
528 | * to set it upon our initial hypercall. | ||
529 | */ | 537 | */ |
530 | static void lguest_write_cr3(unsigned long cr3) | 538 | static void lguest_write_cr3(unsigned long cr3) |
531 | { | 539 | { |
532 | lguest_data.pgdir = cr3; | ||
533 | lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); | 540 | lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); |
541 | current_cr3 = cr3; | ||
534 | 542 | ||
535 | /* These two page tables are simple, linear, and used during boot */ | 543 | /* These two page tables are simple, linear, and used during boot */ |
536 | if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) | 544 | if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) |
@@ -539,7 +547,7 @@ static void lguest_write_cr3(unsigned long cr3) | |||
539 | 547 | ||
540 | static unsigned long lguest_read_cr3(void) | 548 | static unsigned long lguest_read_cr3(void) |
541 | { | 549 | { |
542 | return lguest_data.pgdir; | 550 | return current_cr3; |
543 | } | 551 | } |
544 | 552 | ||
545 | /* cr4 is used to enable and disable PGE, but we don't care. */ | 553 | /* cr4 is used to enable and disable PGE, but we don't care. */ |
@@ -641,7 +649,7 @@ static void lguest_write_cr4(unsigned long val) | |||
641 | 649 | ||
642 | /* | 650 | /* |
643 | * The Guest calls this after it has set a second-level entry (pte), ie. to map | 651 | * The Guest calls this after it has set a second-level entry (pte), ie. to map |
644 | * a page into a process' address space. Wetell the Host the toplevel and | 652 | * a page into a process' address space. We tell the Host the toplevel and |
645 | * address this corresponds to. The Guest uses one pagetable per process, so | 653 | * address this corresponds to. The Guest uses one pagetable per process, so |
646 | * we need to tell the Host which one we're changing (mm->pgd). | 654 | * we need to tell the Host which one we're changing (mm->pgd). |
647 | */ | 655 | */ |
@@ -758,7 +766,7 @@ static void lguest_pmd_clear(pmd_t *pmdp) | |||
758 | static void lguest_flush_tlb_single(unsigned long addr) | 766 | static void lguest_flush_tlb_single(unsigned long addr) |
759 | { | 767 | { |
760 | /* Simply set it to zero: if it was not, it will fault back in. */ | 768 | /* Simply set it to zero: if it was not, it will fault back in. */ |
761 | lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); | 769 | lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0); |
762 | } | 770 | } |
763 | 771 | ||
764 | /* | 772 | /* |
@@ -1140,7 +1148,7 @@ static struct notifier_block paniced = { | |||
1140 | static __init char *lguest_memory_setup(void) | 1148 | static __init char *lguest_memory_setup(void) |
1141 | { | 1149 | { |
1142 | /* | 1150 | /* |
1143 | *The Linux bootloader header contains an "e820" memory map: the | 1151 | * The Linux bootloader header contains an "e820" memory map: the |
1144 | * Launcher populated the first entry with our memory limit. | 1152 | * Launcher populated the first entry with our memory limit. |
1145 | */ | 1153 | */ |
1146 | e820_add_region(boot_params.e820_map[0].addr, | 1154 | e820_add_region(boot_params.e820_map[0].addr, |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 4f420c2f2d55..6ddfe4fc23c3 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -6,18 +6,22 @@ | |||
6 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | 7 | ||
8 | /*G:020 | 8 | /*G:020 |
9 | * Our story starts with the kernel booting into startup_32 in | 9 | |
10 | * arch/x86/kernel/head_32.S. It expects a boot header, which is created by | 10 | * Our story starts with the bzImage: booting starts at startup_32 in |
11 | * the bootloader (the Launcher in our case). | 11 | * arch/x86/boot/compressed/head_32.S. This merely uncompresses the real |
12 | * kernel in place and then jumps into it: startup_32 in | ||
13 | * arch/x86/kernel/head_32.S. Both routines expects a boot header in the %esi | ||
14 | * register, which is created by the bootloader (the Launcher in our case). | ||
12 | * | 15 | * |
13 | * The startup_32 function does very little: it clears the uninitialized global | 16 | * The startup_32 function does very little: it clears the uninitialized global |
14 | * C variables which we expect to be zero (ie. BSS) and then copies the boot | 17 | * C variables which we expect to be zero (ie. BSS) and then copies the boot |
15 | * header and kernel command line somewhere safe. Finally it checks the | 18 | * header and kernel command line somewhere safe, and populates some initial |
16 | * 'hardware_subarch' field. This was introduced in 2.6.24 for lguest and Xen: | 19 | * page tables. Finally it checks the 'hardware_subarch' field. This was |
17 | * if it's set to '1' (lguest's assigned number), then it calls us here. | 20 | * introduced in 2.6.24 for lguest and Xen: if it's set to '1' (lguest's |
21 | * assigned number), then it calls us here. | ||
18 | * | 22 | * |
19 | * WARNING: be very careful here! We're running at addresses equal to physical | 23 | * WARNING: be very careful here! We're running at addresses equal to physical |
20 | * addesses (around 0), not above PAGE_OFFSET as most code expectes | 24 | * addresses (around 0), not above PAGE_OFFSET as most code expects |
21 | * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any | 25 | * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any |
22 | * data without remembering to subtract __PAGE_OFFSET! | 26 | * data without remembering to subtract __PAGE_OFFSET! |
23 | * | 27 | * |
@@ -27,13 +31,18 @@ | |||
27 | .section .init.text, "ax", @progbits | 31 | .section .init.text, "ax", @progbits |
28 | ENTRY(lguest_entry) | 32 | ENTRY(lguest_entry) |
29 | /* | 33 | /* |
30 | * We make the "initialization" hypercall now to tell the Host about | 34 | * We make the "initialization" hypercall now to tell the Host where |
31 | * us, and also find out where it put our page tables. | 35 | * our lguest_data struct is. |
32 | */ | 36 | */ |
33 | movl $LHCALL_LGUEST_INIT, %eax | 37 | movl $LHCALL_LGUEST_INIT, %eax |
34 | movl $lguest_data - __PAGE_OFFSET, %ebx | 38 | movl $lguest_data - __PAGE_OFFSET, %ebx |
35 | int $LGUEST_TRAP_ENTRY | 39 | int $LGUEST_TRAP_ENTRY |
36 | 40 | ||
41 | /* Now turn our pagetables on; setup by arch/x86/kernel/head_32.S. */ | ||
42 | movl $LHCALL_NEW_PGTABLE, %eax | ||
43 | movl $(initial_page_table - __PAGE_OFFSET), %ebx | ||
44 | int $LGUEST_TRAP_ENTRY | ||
45 | |||
37 | /* Set up the initial stack so we can run C code. */ | 46 | /* Set up the initial stack so we can run C code. */ |
38 | movl $(init_thread_union+THREAD_SIZE),%esp | 47 | movl $(init_thread_union+THREAD_SIZE),%esp |
39 | 48 | ||
@@ -96,12 +105,8 @@ send_interrupts: | |||
96 | */ | 105 | */ |
97 | pushl %eax | 106 | pushl %eax |
98 | movl $LHCALL_SEND_INTERRUPTS, %eax | 107 | movl $LHCALL_SEND_INTERRUPTS, %eax |
99 | /* | 108 | /* This is the actual hypercall trap. */ |
100 | * This is a vmcall instruction (same thing that KVM uses). Older | 109 | int $LGUEST_TRAP_ENTRY |
101 | * assembler versions might not know the "vmcall" instruction, so we | ||
102 | * create one manually here. | ||
103 | */ | ||
104 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ | ||
105 | /* Put eax back the way we found it. */ | 110 | /* Put eax back the way we found it. */ |
106 | popl %eax | 111 | popl %eax |
107 | ret | 112 | ret |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f2479f19ddde..b00f6785da74 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -18,8 +18,10 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o | |||
18 | 18 | ||
19 | lib-y := delay.o | 19 | lib-y := delay.o |
20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o |
22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
23 | lib-$(CONFIG_SMP) += rwlock.o | ||
24 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | ||
23 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o | 25 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
24 | 26 | ||
25 | obj-y += msr.o msr-reg.o msr-reg-export.o | 27 | obj-y += msr.o msr-reg.o msr-reg-export.o |
@@ -29,7 +31,7 @@ ifeq ($(CONFIG_X86_32),y) | |||
29 | lib-y += atomic64_cx8_32.o | 31 | lib-y += atomic64_cx8_32.o |
30 | lib-y += checksum_32.o | 32 | lib-y += checksum_32.o |
31 | lib-y += strstr_32.o | 33 | lib-y += strstr_32.o |
32 | lib-y += semaphore_32.o string_32.o | 34 | lib-y += string_32.o |
33 | lib-y += cmpxchg.o | 35 | lib-y += cmpxchg.o |
34 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | 36 | ifneq ($(CONFIG_X86_CMPXCHG64),y) |
35 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o | 37 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o |
@@ -40,7 +42,6 @@ else | |||
40 | lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o | 42 | lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o |
41 | lib-y += thunk_64.o clear_page_64.o copy_page_64.o | 43 | lib-y += thunk_64.o clear_page_64.o copy_page_64.o |
42 | lib-y += memmove_64.o memset_64.o | 44 | lib-y += memmove_64.o memset_64.o |
43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 45 | lib-y += copy_user_64.o copy_user_nocache_64.o |
44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | ||
45 | lib-y += cmpxchg16b_emu.o | 46 | lib-y += cmpxchg16b_emu.o |
46 | endif | 47 | endif |
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 6fec2d1cebe1..01c805ba5359 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -2,6 +2,7 @@ | |||
2 | 2 | ||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | 4 | #include <asm/dwarf2.h> |
5 | #include <asm/alternative-asm.h> | ||
5 | 6 | ||
6 | ALIGN | 7 | ALIGN |
7 | copy_page_c: | 8 | copy_page_c: |
@@ -110,10 +111,6 @@ ENDPROC(copy_page) | |||
110 | 2: | 111 | 2: |
111 | .previous | 112 | .previous |
112 | .section .altinstructions,"a" | 113 | .section .altinstructions,"a" |
113 | .align 8 | 114 | altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \ |
114 | .quad copy_page | 115 | .Lcopy_page_end-copy_page, 2b-1b |
115 | .quad 1b | ||
116 | .word X86_FEATURE_REP_GOOD | ||
117 | .byte .Lcopy_page_end - copy_page | ||
118 | .byte 2b - 1b | ||
119 | .previous | 116 | .previous |
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index d0ec9c2936d7..ee164610ec46 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
11 | #include <asm/cpufeature.h> | 11 | #include <asm/cpufeature.h> |
12 | #include <asm/alternative-asm.h> | ||
12 | 13 | ||
13 | #undef memmove | 14 | #undef memmove |
14 | 15 | ||
@@ -214,11 +215,9 @@ ENTRY(memmove) | |||
214 | .previous | 215 | .previous |
215 | 216 | ||
216 | .section .altinstructions,"a" | 217 | .section .altinstructions,"a" |
217 | .align 8 | 218 | altinstruction_entry .Lmemmove_begin_forward, \ |
218 | .quad .Lmemmove_begin_forward | 219 | .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \ |
219 | .quad .Lmemmove_begin_forward_efs | 220 | .Lmemmove_end_forward-.Lmemmove_begin_forward, \ |
220 | .word X86_FEATURE_ERMS | 221 | .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs |
221 | .byte .Lmemmove_end_forward-.Lmemmove_begin_forward | ||
222 | .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs | ||
223 | .previous | 222 | .previous |
224 | ENDPROC(memmove) | 223 | ENDPROC(memmove) |
diff --git a/arch/x86/lib/rwlock.S b/arch/x86/lib/rwlock.S new file mode 100644 index 000000000000..1cad22139c88 --- /dev/null +++ b/arch/x86/lib/rwlock.S | |||
@@ -0,0 +1,44 @@ | |||
1 | /* Slow paths of read/write spinlocks. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/alternative-asm.h> | ||
5 | #include <asm/frame.h> | ||
6 | #include <asm/rwlock.h> | ||
7 | |||
8 | #ifdef CONFIG_X86_32 | ||
9 | # define __lock_ptr eax | ||
10 | #else | ||
11 | # define __lock_ptr rdi | ||
12 | #endif | ||
13 | |||
14 | ENTRY(__write_lock_failed) | ||
15 | CFI_STARTPROC | ||
16 | FRAME | ||
17 | 0: LOCK_PREFIX | ||
18 | WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr) | ||
19 | 1: rep; nop | ||
20 | cmpl $WRITE_LOCK_CMP, (%__lock_ptr) | ||
21 | jne 1b | ||
22 | LOCK_PREFIX | ||
23 | WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr) | ||
24 | jnz 0b | ||
25 | ENDFRAME | ||
26 | ret | ||
27 | CFI_ENDPROC | ||
28 | END(__write_lock_failed) | ||
29 | |||
30 | ENTRY(__read_lock_failed) | ||
31 | CFI_STARTPROC | ||
32 | FRAME | ||
33 | 0: LOCK_PREFIX | ||
34 | READ_LOCK_SIZE(inc) (%__lock_ptr) | ||
35 | 1: rep; nop | ||
36 | READ_LOCK_SIZE(cmp) $1, (%__lock_ptr) | ||
37 | js 1b | ||
38 | LOCK_PREFIX | ||
39 | READ_LOCK_SIZE(dec) (%__lock_ptr) | ||
40 | js 0b | ||
41 | ENDFRAME | ||
42 | ret | ||
43 | CFI_ENDPROC | ||
44 | END(__read_lock_failed) | ||
diff --git a/arch/x86/lib/rwlock_64.S b/arch/x86/lib/rwlock_64.S deleted file mode 100644 index 05ea55f71405..000000000000 --- a/arch/x86/lib/rwlock_64.S +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | /* Slow paths of read/write spinlocks. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/rwlock.h> | ||
5 | #include <asm/alternative-asm.h> | ||
6 | #include <asm/dwarf2.h> | ||
7 | |||
8 | /* rdi: pointer to rwlock_t */ | ||
9 | ENTRY(__write_lock_failed) | ||
10 | CFI_STARTPROC | ||
11 | LOCK_PREFIX | ||
12 | addl $RW_LOCK_BIAS,(%rdi) | ||
13 | 1: rep | ||
14 | nop | ||
15 | cmpl $RW_LOCK_BIAS,(%rdi) | ||
16 | jne 1b | ||
17 | LOCK_PREFIX | ||
18 | subl $RW_LOCK_BIAS,(%rdi) | ||
19 | jnz __write_lock_failed | ||
20 | ret | ||
21 | CFI_ENDPROC | ||
22 | END(__write_lock_failed) | ||
23 | |||
24 | /* rdi: pointer to rwlock_t */ | ||
25 | ENTRY(__read_lock_failed) | ||
26 | CFI_STARTPROC | ||
27 | LOCK_PREFIX | ||
28 | incl (%rdi) | ||
29 | 1: rep | ||
30 | nop | ||
31 | cmpl $1,(%rdi) | ||
32 | js 1b | ||
33 | LOCK_PREFIX | ||
34 | decl (%rdi) | ||
35 | js __read_lock_failed | ||
36 | ret | ||
37 | CFI_ENDPROC | ||
38 | END(__read_lock_failed) | ||
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem.S index 67743977398b..5dff5f042468 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem.S | |||
@@ -1,4 +1,51 @@ | |||
1 | /* | 1 | /* |
2 | * x86 semaphore implementation. | ||
3 | * | ||
4 | * (C) Copyright 1999 Linus Torvalds | ||
5 | * | ||
6 | * Portions Copyright 1999 Red Hat, Inc. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> | ||
14 | */ | ||
15 | |||
16 | #include <linux/linkage.h> | ||
17 | #include <asm/alternative-asm.h> | ||
18 | #include <asm/dwarf2.h> | ||
19 | |||
20 | #define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg) | ||
21 | #define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l) | ||
22 | |||
23 | #ifdef CONFIG_X86_32 | ||
24 | |||
25 | /* | ||
26 | * The semaphore operations have a special calling sequence that | ||
27 | * allow us to do a simpler in-line version of them. These routines | ||
28 | * need to convert that sequence back into the C sequence when | ||
29 | * there is contention on the semaphore. | ||
30 | * | ||
31 | * %eax contains the semaphore pointer on entry. Save the C-clobbered | ||
32 | * registers (%eax, %edx and %ecx) except %eax whish is either a return | ||
33 | * value or just clobbered.. | ||
34 | */ | ||
35 | |||
36 | #define save_common_regs \ | ||
37 | pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 | ||
38 | |||
39 | #define restore_common_regs \ | ||
40 | popl_cfi %ecx; CFI_RESTORE ecx | ||
41 | |||
42 | /* Avoid uglifying the argument copying x86-64 needs to do. */ | ||
43 | .macro movq src, dst | ||
44 | .endm | ||
45 | |||
46 | #else | ||
47 | |||
48 | /* | ||
2 | * x86-64 rwsem wrappers | 49 | * x86-64 rwsem wrappers |
3 | * | 50 | * |
4 | * This interfaces the inline asm code to the slow-path | 51 | * This interfaces the inline asm code to the slow-path |
@@ -16,12 +63,6 @@ | |||
16 | * but %rdi, %rsi, %rcx, %r8-r11 always need saving. | 63 | * but %rdi, %rsi, %rcx, %r8-r11 always need saving. |
17 | */ | 64 | */ |
18 | 65 | ||
19 | #include <linux/linkage.h> | ||
20 | #include <asm/rwlock.h> | ||
21 | #include <asm/alternative-asm.h> | ||
22 | #include <asm/frame.h> | ||
23 | #include <asm/dwarf2.h> | ||
24 | |||
25 | #define save_common_regs \ | 66 | #define save_common_regs \ |
26 | pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ | 67 | pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ |
27 | pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ | 68 | pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ |
@@ -40,16 +81,18 @@ | |||
40 | popq_cfi %rsi; CFI_RESTORE rsi; \ | 81 | popq_cfi %rsi; CFI_RESTORE rsi; \ |
41 | popq_cfi %rdi; CFI_RESTORE rdi | 82 | popq_cfi %rdi; CFI_RESTORE rdi |
42 | 83 | ||
84 | #endif | ||
85 | |||
43 | /* Fix up special calling conventions */ | 86 | /* Fix up special calling conventions */ |
44 | ENTRY(call_rwsem_down_read_failed) | 87 | ENTRY(call_rwsem_down_read_failed) |
45 | CFI_STARTPROC | 88 | CFI_STARTPROC |
46 | save_common_regs | 89 | save_common_regs |
47 | pushq_cfi %rdx | 90 | __ASM_SIZE(push,_cfi) %__ASM_REG(dx) |
48 | CFI_REL_OFFSET rdx, 0 | 91 | CFI_REL_OFFSET __ASM_REG(dx), 0 |
49 | movq %rax,%rdi | 92 | movq %rax,%rdi |
50 | call rwsem_down_read_failed | 93 | call rwsem_down_read_failed |
51 | popq_cfi %rdx | 94 | __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) |
52 | CFI_RESTORE rdx | 95 | CFI_RESTORE __ASM_REG(dx) |
53 | restore_common_regs | 96 | restore_common_regs |
54 | ret | 97 | ret |
55 | CFI_ENDPROC | 98 | CFI_ENDPROC |
@@ -67,7 +110,8 @@ ENDPROC(call_rwsem_down_write_failed) | |||
67 | 110 | ||
68 | ENTRY(call_rwsem_wake) | 111 | ENTRY(call_rwsem_wake) |
69 | CFI_STARTPROC | 112 | CFI_STARTPROC |
70 | decl %edx /* do nothing if still outstanding active readers */ | 113 | /* do nothing if still outstanding active readers */ |
114 | __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx) | ||
71 | jnz 1f | 115 | jnz 1f |
72 | save_common_regs | 116 | save_common_regs |
73 | movq %rax,%rdi | 117 | movq %rax,%rdi |
@@ -77,16 +121,15 @@ ENTRY(call_rwsem_wake) | |||
77 | CFI_ENDPROC | 121 | CFI_ENDPROC |
78 | ENDPROC(call_rwsem_wake) | 122 | ENDPROC(call_rwsem_wake) |
79 | 123 | ||
80 | /* Fix up special calling conventions */ | ||
81 | ENTRY(call_rwsem_downgrade_wake) | 124 | ENTRY(call_rwsem_downgrade_wake) |
82 | CFI_STARTPROC | 125 | CFI_STARTPROC |
83 | save_common_regs | 126 | save_common_regs |
84 | pushq_cfi %rdx | 127 | __ASM_SIZE(push,_cfi) %__ASM_REG(dx) |
85 | CFI_REL_OFFSET rdx, 0 | 128 | CFI_REL_OFFSET __ASM_REG(dx), 0 |
86 | movq %rax,%rdi | 129 | movq %rax,%rdi |
87 | call rwsem_downgrade_wake | 130 | call rwsem_downgrade_wake |
88 | popq_cfi %rdx | 131 | __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) |
89 | CFI_RESTORE rdx | 132 | CFI_RESTORE __ASM_REG(dx) |
90 | restore_common_regs | 133 | restore_common_regs |
91 | ret | 134 | ret |
92 | CFI_ENDPROC | 135 | CFI_ENDPROC |
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S deleted file mode 100644 index 06691daa4108..000000000000 --- a/arch/x86/lib/semaphore_32.S +++ /dev/null | |||
@@ -1,124 +0,0 @@ | |||
1 | /* | ||
2 | * i386 semaphore implementation. | ||
3 | * | ||
4 | * (C) Copyright 1999 Linus Torvalds | ||
5 | * | ||
6 | * Portions Copyright 1999 Red Hat, Inc. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> | ||
14 | */ | ||
15 | |||
16 | #include <linux/linkage.h> | ||
17 | #include <asm/rwlock.h> | ||
18 | #include <asm/alternative-asm.h> | ||
19 | #include <asm/frame.h> | ||
20 | #include <asm/dwarf2.h> | ||
21 | |||
22 | /* | ||
23 | * The semaphore operations have a special calling sequence that | ||
24 | * allow us to do a simpler in-line version of them. These routines | ||
25 | * need to convert that sequence back into the C sequence when | ||
26 | * there is contention on the semaphore. | ||
27 | * | ||
28 | * %eax contains the semaphore pointer on entry. Save the C-clobbered | ||
29 | * registers (%eax, %edx and %ecx) except %eax whish is either a return | ||
30 | * value or just clobbered.. | ||
31 | */ | ||
32 | .section .sched.text, "ax" | ||
33 | |||
34 | /* | ||
35 | * rw spinlock fallbacks | ||
36 | */ | ||
37 | #ifdef CONFIG_SMP | ||
38 | ENTRY(__write_lock_failed) | ||
39 | CFI_STARTPROC | ||
40 | FRAME | ||
41 | 2: LOCK_PREFIX | ||
42 | addl $ RW_LOCK_BIAS,(%eax) | ||
43 | 1: rep; nop | ||
44 | cmpl $ RW_LOCK_BIAS,(%eax) | ||
45 | jne 1b | ||
46 | LOCK_PREFIX | ||
47 | subl $ RW_LOCK_BIAS,(%eax) | ||
48 | jnz 2b | ||
49 | ENDFRAME | ||
50 | ret | ||
51 | CFI_ENDPROC | ||
52 | ENDPROC(__write_lock_failed) | ||
53 | |||
54 | ENTRY(__read_lock_failed) | ||
55 | CFI_STARTPROC | ||
56 | FRAME | ||
57 | 2: LOCK_PREFIX | ||
58 | incl (%eax) | ||
59 | 1: rep; nop | ||
60 | cmpl $1,(%eax) | ||
61 | js 1b | ||
62 | LOCK_PREFIX | ||
63 | decl (%eax) | ||
64 | js 2b | ||
65 | ENDFRAME | ||
66 | ret | ||
67 | CFI_ENDPROC | ||
68 | ENDPROC(__read_lock_failed) | ||
69 | |||
70 | #endif | ||
71 | |||
72 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | ||
73 | |||
74 | /* Fix up special calling conventions */ | ||
75 | ENTRY(call_rwsem_down_read_failed) | ||
76 | CFI_STARTPROC | ||
77 | pushl_cfi %ecx | ||
78 | CFI_REL_OFFSET ecx,0 | ||
79 | pushl_cfi %edx | ||
80 | CFI_REL_OFFSET edx,0 | ||
81 | call rwsem_down_read_failed | ||
82 | popl_cfi %edx | ||
83 | popl_cfi %ecx | ||
84 | ret | ||
85 | CFI_ENDPROC | ||
86 | ENDPROC(call_rwsem_down_read_failed) | ||
87 | |||
88 | ENTRY(call_rwsem_down_write_failed) | ||
89 | CFI_STARTPROC | ||
90 | pushl_cfi %ecx | ||
91 | CFI_REL_OFFSET ecx,0 | ||
92 | calll rwsem_down_write_failed | ||
93 | popl_cfi %ecx | ||
94 | ret | ||
95 | CFI_ENDPROC | ||
96 | ENDPROC(call_rwsem_down_write_failed) | ||
97 | |||
98 | ENTRY(call_rwsem_wake) | ||
99 | CFI_STARTPROC | ||
100 | decw %dx /* do nothing if still outstanding active readers */ | ||
101 | jnz 1f | ||
102 | pushl_cfi %ecx | ||
103 | CFI_REL_OFFSET ecx,0 | ||
104 | call rwsem_wake | ||
105 | popl_cfi %ecx | ||
106 | 1: ret | ||
107 | CFI_ENDPROC | ||
108 | ENDPROC(call_rwsem_wake) | ||
109 | |||
110 | /* Fix up special calling conventions */ | ||
111 | ENTRY(call_rwsem_downgrade_wake) | ||
112 | CFI_STARTPROC | ||
113 | pushl_cfi %ecx | ||
114 | CFI_REL_OFFSET ecx,0 | ||
115 | pushl_cfi %edx | ||
116 | CFI_REL_OFFSET edx,0 | ||
117 | call rwsem_downgrade_wake | ||
118 | popl_cfi %edx | ||
119 | popl_cfi %ecx | ||
120 | ret | ||
121 | CFI_ENDPROC | ||
122 | ENDPROC(call_rwsem_downgrade_wake) | ||
123 | |||
124 | #endif | ||
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index 782b082c9ff7..a63efd6bb6a5 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -5,50 +5,41 @@ | |||
5 | * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. | 5 | * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. |
6 | * Subject to the GNU public license, v.2. No warranty of any kind. | 6 | * Subject to the GNU public license, v.2. No warranty of any kind. |
7 | */ | 7 | */ |
8 | #include <linux/linkage.h> | ||
9 | #include <asm/dwarf2.h> | ||
10 | #include <asm/calling.h> | ||
8 | 11 | ||
9 | #include <linux/linkage.h> | 12 | /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ |
10 | #include <asm/dwarf2.h> | 13 | .macro THUNK name, func, put_ret_addr_in_rdi=0 |
11 | #include <asm/calling.h> | ||
12 | #include <asm/rwlock.h> | ||
13 | |||
14 | /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ | ||
15 | .macro thunk name,func | ||
16 | .globl \name | ||
17 | \name: | ||
18 | CFI_STARTPROC | ||
19 | SAVE_ARGS | ||
20 | call \func | ||
21 | jmp restore | ||
22 | CFI_ENDPROC | ||
23 | .endm | ||
24 | |||
25 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
26 | /* put return address in rdi (arg1) */ | ||
27 | .macro thunk_ra name,func | ||
28 | .globl \name | 14 | .globl \name |
29 | \name: | 15 | \name: |
30 | CFI_STARTPROC | 16 | CFI_STARTPROC |
17 | |||
18 | /* this one pushes 9 elems, the next one would be %rIP */ | ||
31 | SAVE_ARGS | 19 | SAVE_ARGS |
32 | /* SAVE_ARGS pushs 9 elements */ | 20 | |
33 | /* the next element would be the rip */ | 21 | .if \put_ret_addr_in_rdi |
34 | movq 9*8(%rsp), %rdi | 22 | movq_cfi_restore 9*8, rdi |
23 | .endif | ||
24 | |||
35 | call \func | 25 | call \func |
36 | jmp restore | 26 | jmp restore |
37 | CFI_ENDPROC | 27 | CFI_ENDPROC |
38 | .endm | 28 | .endm |
39 | 29 | ||
40 | thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller | 30 | #ifdef CONFIG_TRACE_IRQFLAGS |
41 | thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller | 31 | THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 |
32 | THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 | ||
42 | #endif | 33 | #endif |
43 | 34 | ||
44 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 35 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
45 | thunk lockdep_sys_exit_thunk,lockdep_sys_exit | 36 | THUNK lockdep_sys_exit_thunk,lockdep_sys_exit |
46 | #endif | 37 | #endif |
47 | 38 | ||
48 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ | 39 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ |
49 | CFI_STARTPROC | 40 | CFI_STARTPROC |
50 | SAVE_ARGS | 41 | SAVE_ARGS |
51 | restore: | 42 | restore: |
52 | RESTORE_ARGS | 43 | RESTORE_ARGS |
53 | ret | 44 | ret |
54 | CFI_ENDPROC | 45 | CFI_ENDPROC |
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c new file mode 100644 index 000000000000..97be9cb54483 --- /dev/null +++ b/arch/x86/lib/usercopy.c | |||
@@ -0,0 +1,43 @@ | |||
1 | /* | ||
2 | * User address space access functions. | ||
3 | * | ||
4 | * For licencing details see kernel-base/COPYING | ||
5 | */ | ||
6 | |||
7 | #include <linux/highmem.h> | ||
8 | #include <linux/module.h> | ||
9 | |||
10 | /* | ||
11 | * best effort, GUP based copy_from_user() that is NMI-safe | ||
12 | */ | ||
13 | unsigned long | ||
14 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
15 | { | ||
16 | unsigned long offset, addr = (unsigned long)from; | ||
17 | unsigned long size, len = 0; | ||
18 | struct page *page; | ||
19 | void *map; | ||
20 | int ret; | ||
21 | |||
22 | do { | ||
23 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
24 | if (!ret) | ||
25 | break; | ||
26 | |||
27 | offset = addr & (PAGE_SIZE - 1); | ||
28 | size = min(PAGE_SIZE - offset, n - len); | ||
29 | |||
30 | map = kmap_atomic(page); | ||
31 | memcpy(to, map+offset, size); | ||
32 | kunmap_atomic(map); | ||
33 | put_page(page); | ||
34 | |||
35 | len += size; | ||
36 | to += size; | ||
37 | addr += size; | ||
38 | |||
39 | } while (len < n); | ||
40 | |||
41 | return len; | ||
42 | } | ||
43 | EXPORT_SYMBOL_GPL(copy_from_user_nmi); | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2dbf6bf4c7e5..4d09df054e39 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1059 | if (unlikely(error_code & PF_RSVD)) | 1059 | if (unlikely(error_code & PF_RSVD)) |
1060 | pgtable_bad(regs, error_code, address); | 1060 | pgtable_bad(regs, error_code, address); |
1061 | 1061 | ||
1062 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); | 1062 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
1063 | 1063 | ||
1064 | /* | 1064 | /* |
1065 | * If we're in an interrupt, have no user context or are running | 1065 | * If we're in an interrupt, have no user context or are running |
@@ -1161,11 +1161,11 @@ good_area: | |||
1161 | if (flags & FAULT_FLAG_ALLOW_RETRY) { | 1161 | if (flags & FAULT_FLAG_ALLOW_RETRY) { |
1162 | if (fault & VM_FAULT_MAJOR) { | 1162 | if (fault & VM_FAULT_MAJOR) { |
1163 | tsk->maj_flt++; | 1163 | tsk->maj_flt++; |
1164 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, | 1164 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, |
1165 | regs, address); | 1165 | regs, address); |
1166 | } else { | 1166 | } else { |
1167 | tsk->min_flt++; | 1167 | tsk->min_flt++; |
1168 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | 1168 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, |
1169 | regs, address); | 1169 | regs, address); |
1170 | } | 1170 | } |
1171 | if (fault & VM_FAULT_RETRY) { | 1171 | if (fault & VM_FAULT_RETRY) { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d865c4aeec55..bbaaa005bf0e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/poison.h> | 28 | #include <linux/poison.h> |
29 | #include <linux/dma-mapping.h> | 29 | #include <linux/dma-mapping.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/memory.h> | ||
31 | #include <linux/memory_hotplug.h> | 32 | #include <linux/memory_hotplug.h> |
32 | #include <linux/nmi.h> | 33 | #include <linux/nmi.h> |
33 | #include <linux/gfp.h> | 34 | #include <linux/gfp.h> |
@@ -895,8 +896,6 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
895 | } | 896 | } |
896 | 897 | ||
897 | #ifdef CONFIG_X86_UV | 898 | #ifdef CONFIG_X86_UV |
898 | #define MIN_MEMORY_BLOCK_SIZE (1 << SECTION_SIZE_BITS) | ||
899 | |||
900 | unsigned long memory_block_size_bytes(void) | 899 | unsigned long memory_block_size_bytes(void) |
901 | { | 900 | { |
902 | if (is_uv_system()) { | 901 | if (is_uv_system()) { |
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 704a37cedddb..dab41876cdd5 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, | |||
185 | e->trace.entries = e->trace_entries; | 185 | e->trace.entries = e->trace_entries; |
186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | 186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); |
187 | e->trace.skip = 0; | 187 | e->trace.skip = 0; |
188 | save_stack_trace_regs(&e->trace, regs); | 188 | save_stack_trace_regs(regs, &e->trace); |
189 | 189 | ||
190 | /* Round address down to nearest 16 bytes */ | 190 | /* Round address down to nearest 16 bytes */ |
191 | shadow_copy = kmemcheck_shadow_lookup(address | 191 | shadow_copy = kmemcheck_shadow_lookup(address |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index f5510d889a22..fbeaaf416610 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -496,6 +496,7 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | |||
496 | 496 | ||
497 | static int __init numa_register_memblks(struct numa_meminfo *mi) | 497 | static int __init numa_register_memblks(struct numa_meminfo *mi) |
498 | { | 498 | { |
499 | unsigned long uninitialized_var(pfn_align); | ||
499 | int i, nid; | 500 | int i, nid; |
500 | 501 | ||
501 | /* Account for nodes with cpus and no memory */ | 502 | /* Account for nodes with cpus and no memory */ |
@@ -511,6 +512,20 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
511 | 512 | ||
512 | /* for out of order entries */ | 513 | /* for out of order entries */ |
513 | sort_node_map(); | 514 | sort_node_map(); |
515 | |||
516 | /* | ||
517 | * If sections array is gonna be used for pfn -> nid mapping, check | ||
518 | * whether its granularity is fine enough. | ||
519 | */ | ||
520 | #ifdef NODE_NOT_IN_PAGE_FLAGS | ||
521 | pfn_align = node_map_pfn_alignment(); | ||
522 | if (pfn_align && pfn_align < PAGES_PER_SECTION) { | ||
523 | printk(KERN_WARNING "Node alignment %LuMB < min %LuMB, rejecting NUMA config\n", | ||
524 | PFN_PHYS(pfn_align) >> 20, | ||
525 | PFN_PHYS(PAGES_PER_SECTION) >> 20); | ||
526 | return -EINVAL; | ||
527 | } | ||
528 | #endif | ||
514 | if (!numa_meminfo_cover_memory(mi)) | 529 | if (!numa_meminfo_cover_memory(mi)) |
515 | return -EINVAL; | 530 | return -EINVAL; |
516 | 531 | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 849a975d3fa0..3adebe7e536a 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -41,7 +41,7 @@ | |||
41 | * physnode_map[16-31] = 1; | 41 | * physnode_map[16-31] = 1; |
42 | * physnode_map[32- ] = -1; | 42 | * physnode_map[32- ] = -1; |
43 | */ | 43 | */ |
44 | s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; | 44 | s8 physnode_map[MAX_SECTIONS] __read_mostly = { [0 ... (MAX_SECTIONS - 1)] = -1}; |
45 | EXPORT_SYMBOL(physnode_map); | 45 | EXPORT_SYMBOL(physnode_map); |
46 | 46 | ||
47 | void memory_present(int nid, unsigned long start, unsigned long end) | 47 | void memory_present(int nid, unsigned long start, unsigned long end) |
@@ -52,8 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
52 | nid, start, end); | 52 | nid, start, end); |
53 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); | 53 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); |
54 | printk(KERN_DEBUG " "); | 54 | printk(KERN_DEBUG " "); |
55 | for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { | 55 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { |
56 | physnode_map[pfn / PAGES_PER_ELEMENT] = nid; | 56 | physnode_map[pfn / PAGES_PER_SECTION] = nid; |
57 | printk(KERN_CONT "%lx ", pfn); | 57 | printk(KERN_CONT "%lx ", pfn); |
58 | } | 58 | } |
59 | printk(KERN_CONT "\n"); | 59 | printk(KERN_CONT "\n"); |
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index e1d106909218..b0086567271c 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
@@ -123,12 +123,11 @@ static int pageattr_test(void) | |||
123 | if (print) | 123 | if (print) |
124 | printk(KERN_INFO "CPA self-test:\n"); | 124 | printk(KERN_INFO "CPA self-test:\n"); |
125 | 125 | ||
126 | bm = vmalloc((max_pfn_mapped + 7) / 8); | 126 | bm = vzalloc((max_pfn_mapped + 7) / 8); |
127 | if (!bm) { | 127 | if (!bm) { |
128 | printk(KERN_ERR "CPA Cannot vmalloc bitmap\n"); | 128 | printk(KERN_ERR "CPA Cannot vmalloc bitmap\n"); |
129 | return -ENOMEM; | 129 | return -ENOMEM; |
130 | } | 130 | } |
131 | memset(bm, 0, (max_pfn_mapped + 7) / 8); | ||
132 | 131 | ||
133 | failed += print_split(&sa); | 132 | failed += print_split(&sa); |
134 | srandom32(100); | 133 | srandom32(100); |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index a5b64ab4cd6e..bff89dfe3619 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -11,10 +11,11 @@ | |||
11 | #include <linux/oprofile.h> | 11 | #include <linux/oprofile.h> |
12 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
13 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
14 | #include <linux/compat.h> | ||
15 | #include <linux/uaccess.h> | ||
16 | |||
14 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
15 | #include <asm/uaccess.h> | ||
16 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
17 | #include <linux/compat.h> | ||
18 | 19 | ||
19 | static int backtrace_stack(void *data, char *name) | 20 | static int backtrace_stack(void *data, char *name) |
20 | { | 21 | { |
@@ -40,13 +41,13 @@ static struct stacktrace_ops backtrace_ops = { | |||
40 | static struct stack_frame_ia32 * | 41 | static struct stack_frame_ia32 * |
41 | dump_user_backtrace_32(struct stack_frame_ia32 *head) | 42 | dump_user_backtrace_32(struct stack_frame_ia32 *head) |
42 | { | 43 | { |
44 | /* Also check accessibility of one struct frame_head beyond: */ | ||
43 | struct stack_frame_ia32 bufhead[2]; | 45 | struct stack_frame_ia32 bufhead[2]; |
44 | struct stack_frame_ia32 *fp; | 46 | struct stack_frame_ia32 *fp; |
47 | unsigned long bytes; | ||
45 | 48 | ||
46 | /* Also check accessibility of one struct frame_head beyond */ | 49 | bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); |
47 | if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) | 50 | if (bytes != sizeof(bufhead)) |
48 | return NULL; | ||
49 | if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) | ||
50 | return NULL; | 51 | return NULL; |
51 | 52 | ||
52 | fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); | 53 | fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); |
@@ -87,12 +88,12 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) | |||
87 | 88 | ||
88 | static struct stack_frame *dump_user_backtrace(struct stack_frame *head) | 89 | static struct stack_frame *dump_user_backtrace(struct stack_frame *head) |
89 | { | 90 | { |
91 | /* Also check accessibility of one struct frame_head beyond: */ | ||
90 | struct stack_frame bufhead[2]; | 92 | struct stack_frame bufhead[2]; |
93 | unsigned long bytes; | ||
91 | 94 | ||
92 | /* Also check accessibility of one struct stack_frame beyond */ | 95 | bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); |
93 | if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) | 96 | if (bytes != sizeof(bufhead)) |
94 | return NULL; | ||
95 | if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) | ||
96 | return NULL; | 97 | return NULL; |
97 | 98 | ||
98 | oprofile_add_trace(bufhead[0].return_address); | 99 | oprofile_add_trace(bufhead[0].return_address); |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cf9750004a08..68894fdc034b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -112,8 +112,10 @@ static void nmi_cpu_start(void *dummy) | |||
112 | static int nmi_start(void) | 112 | static int nmi_start(void) |
113 | { | 113 | { |
114 | get_online_cpus(); | 114 | get_online_cpus(); |
115 | on_each_cpu(nmi_cpu_start, NULL, 1); | ||
116 | ctr_running = 1; | 115 | ctr_running = 1; |
116 | /* make ctr_running visible to the nmi handler: */ | ||
117 | smp_mb(); | ||
118 | on_each_cpu(nmi_cpu_start, NULL, 1); | ||
117 | put_online_cpus(); | 119 | put_online_cpus(); |
118 | return 0; | 120 | return 0; |
119 | } | 121 | } |
@@ -504,15 +506,18 @@ static int nmi_setup(void) | |||
504 | 506 | ||
505 | nmi_enabled = 0; | 507 | nmi_enabled = 0; |
506 | ctr_running = 0; | 508 | ctr_running = 0; |
507 | barrier(); | 509 | /* make variables visible to the nmi handler: */ |
510 | smp_mb(); | ||
508 | err = register_die_notifier(&profile_exceptions_nb); | 511 | err = register_die_notifier(&profile_exceptions_nb); |
509 | if (err) | 512 | if (err) |
510 | goto fail; | 513 | goto fail; |
511 | 514 | ||
512 | get_online_cpus(); | 515 | get_online_cpus(); |
513 | register_cpu_notifier(&oprofile_cpu_nb); | 516 | register_cpu_notifier(&oprofile_cpu_nb); |
514 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
515 | nmi_enabled = 1; | 517 | nmi_enabled = 1; |
518 | /* make nmi_enabled visible to the nmi handler: */ | ||
519 | smp_mb(); | ||
520 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
516 | put_online_cpus(); | 521 | put_online_cpus(); |
517 | 522 | ||
518 | return 0; | 523 | return 0; |
@@ -531,7 +536,8 @@ static void nmi_shutdown(void) | |||
531 | nmi_enabled = 0; | 536 | nmi_enabled = 0; |
532 | ctr_running = 0; | 537 | ctr_running = 0; |
533 | put_online_cpus(); | 538 | put_online_cpus(); |
534 | barrier(); | 539 | /* make variables visible to the nmi handler: */ |
540 | smp_mb(); | ||
535 | unregister_die_notifier(&profile_exceptions_nb); | 541 | unregister_die_notifier(&profile_exceptions_nb); |
536 | msrs = &get_cpu_var(cpu_msrs); | 542 | msrs = &get_cpu_var(cpu_msrs); |
537 | model->shutdown(msrs); | 543 | model->shutdown(msrs); |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 750c346ef50a..301e325992f6 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -519,7 +519,8 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, | |||
519 | if (cfg->address < 0xFFFFFFFF) | 519 | if (cfg->address < 0xFFFFFFFF) |
520 | return 0; | 520 | return 0; |
521 | 521 | ||
522 | if (!strcmp(mcfg->header.oem_id, "SGI")) | 522 | if (!strcmp(mcfg->header.oem_id, "SGI") || |
523 | !strcmp(mcfg->header.oem_id, "SGI2")) | ||
523 | return 0; | 524 | return 0; |
524 | 525 | ||
525 | if (mcfg->header.revision >= 1) { | 526 | if (mcfg->header.revision >= 1) { |
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index fe008309ffec..1017c7bee388 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -1,8 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux | 2 | * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and |
3 | * x86 PCI core to support the Xen PCI Frontend | 3 | * initial domain support. We also handle the DSDT _PRT callbacks for GSI's |
4 | * used in HVM and initial domain mode (PV does not parse ACPI, so it has no | ||
5 | * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and | ||
6 | * 0xcf8 PCI configuration read/write. | ||
4 | * | 7 | * |
5 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | 8 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> |
9 | * Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||
10 | * Stefano Stabellini <stefano.stabellini@eu.citrix.com> | ||
6 | */ | 11 | */ |
7 | #include <linux/module.h> | 12 | #include <linux/module.h> |
8 | #include <linux/init.h> | 13 | #include <linux/init.h> |
@@ -19,22 +24,53 @@ | |||
19 | #include <xen/events.h> | 24 | #include <xen/events.h> |
20 | #include <asm/xen/pci.h> | 25 | #include <asm/xen/pci.h> |
21 | 26 | ||
27 | static int xen_pcifront_enable_irq(struct pci_dev *dev) | ||
28 | { | ||
29 | int rc; | ||
30 | int share = 1; | ||
31 | int pirq; | ||
32 | u8 gsi; | ||
33 | |||
34 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); | ||
35 | if (rc < 0) { | ||
36 | dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", | ||
37 | rc); | ||
38 | return rc; | ||
39 | } | ||
40 | /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ | ||
41 | pirq = gsi; | ||
42 | |||
43 | if (gsi < NR_IRQS_LEGACY) | ||
44 | share = 0; | ||
45 | |||
46 | rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); | ||
47 | if (rc < 0) { | ||
48 | dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", | ||
49 | gsi, pirq, rc); | ||
50 | return rc; | ||
51 | } | ||
52 | |||
53 | dev->irq = rc; | ||
54 | dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); | ||
55 | return 0; | ||
56 | } | ||
57 | |||
22 | #ifdef CONFIG_ACPI | 58 | #ifdef CONFIG_ACPI |
23 | static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | 59 | static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, |
24 | int trigger, int polarity) | 60 | bool set_pirq) |
25 | { | 61 | { |
26 | int rc, irq; | 62 | int rc, pirq = -1, irq = -1; |
27 | struct physdev_map_pirq map_irq; | 63 | struct physdev_map_pirq map_irq; |
28 | int shareable = 0; | 64 | int shareable = 0; |
29 | char *name; | 65 | char *name; |
30 | 66 | ||
31 | if (!xen_hvm_domain()) | 67 | if (set_pirq) |
32 | return -1; | 68 | pirq = gsi; |
33 | 69 | ||
34 | map_irq.domid = DOMID_SELF; | 70 | map_irq.domid = DOMID_SELF; |
35 | map_irq.type = MAP_PIRQ_TYPE_GSI; | 71 | map_irq.type = MAP_PIRQ_TYPE_GSI; |
36 | map_irq.index = gsi; | 72 | map_irq.index = gsi; |
37 | map_irq.pirq = -1; | 73 | map_irq.pirq = pirq; |
38 | 74 | ||
39 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | 75 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); |
40 | if (rc) { | 76 | if (rc) { |
@@ -42,7 +78,7 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | |||
42 | return -1; | 78 | return -1; |
43 | } | 79 | } |
44 | 80 | ||
45 | if (trigger == ACPI_EDGE_SENSITIVE) { | 81 | if (triggering == ACPI_EDGE_SENSITIVE) { |
46 | shareable = 0; | 82 | shareable = 0; |
47 | name = "ioapic-edge"; | 83 | name = "ioapic-edge"; |
48 | } else { | 84 | } else { |
@@ -50,12 +86,63 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | |||
50 | name = "ioapic-level"; | 86 | name = "ioapic-level"; |
51 | } | 87 | } |
52 | 88 | ||
89 | if (gsi_override >= 0) | ||
90 | gsi = gsi_override; | ||
91 | |||
53 | irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); | 92 | irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); |
93 | if (irq < 0) | ||
94 | goto out; | ||
54 | 95 | ||
55 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); | 96 | printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi); |
97 | out: | ||
98 | return irq; | ||
99 | } | ||
100 | |||
101 | static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | ||
102 | int trigger, int polarity) | ||
103 | { | ||
104 | if (!xen_hvm_domain()) | ||
105 | return -1; | ||
106 | |||
107 | return xen_register_pirq(gsi, -1 /* no GSI override */, trigger, | ||
108 | false /* no mapping of GSI to PIRQ */); | ||
109 | } | ||
110 | |||
111 | #ifdef CONFIG_XEN_DOM0 | ||
112 | static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) | ||
113 | { | ||
114 | int rc, irq; | ||
115 | struct physdev_setup_gsi setup_gsi; | ||
116 | |||
117 | if (!xen_pv_domain()) | ||
118 | return -1; | ||
119 | |||
120 | printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", | ||
121 | gsi, triggering, polarity); | ||
122 | |||
123 | irq = xen_register_pirq(gsi, gsi_override, triggering, true); | ||
124 | |||
125 | setup_gsi.gsi = gsi; | ||
126 | setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); | ||
127 | setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
128 | |||
129 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); | ||
130 | if (rc == -EEXIST) | ||
131 | printk(KERN_INFO "Already setup the GSI :%d\n", gsi); | ||
132 | else if (rc) { | ||
133 | printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", | ||
134 | gsi, rc); | ||
135 | } | ||
56 | 136 | ||
57 | return irq; | 137 | return irq; |
58 | } | 138 | } |
139 | |||
140 | static int acpi_register_gsi_xen(struct device *dev, u32 gsi, | ||
141 | int trigger, int polarity) | ||
142 | { | ||
143 | return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity); | ||
144 | } | ||
145 | #endif | ||
59 | #endif | 146 | #endif |
60 | 147 | ||
61 | #if defined(CONFIG_PCI_MSI) | 148 | #if defined(CONFIG_PCI_MSI) |
@@ -65,6 +152,43 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | |||
65 | struct xen_pci_frontend_ops *xen_pci_frontend; | 152 | struct xen_pci_frontend_ops *xen_pci_frontend; |
66 | EXPORT_SYMBOL_GPL(xen_pci_frontend); | 153 | EXPORT_SYMBOL_GPL(xen_pci_frontend); |
67 | 154 | ||
155 | static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | ||
156 | { | ||
157 | int irq, ret, i; | ||
158 | struct msi_desc *msidesc; | ||
159 | int *v; | ||
160 | |||
161 | v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); | ||
162 | if (!v) | ||
163 | return -ENOMEM; | ||
164 | |||
165 | if (type == PCI_CAP_ID_MSIX) | ||
166 | ret = xen_pci_frontend_enable_msix(dev, v, nvec); | ||
167 | else | ||
168 | ret = xen_pci_frontend_enable_msi(dev, v); | ||
169 | if (ret) | ||
170 | goto error; | ||
171 | i = 0; | ||
172 | list_for_each_entry(msidesc, &dev->msi_list, list) { | ||
173 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, | ||
174 | (type == PCI_CAP_ID_MSIX) ? | ||
175 | "pcifront-msi-x" : | ||
176 | "pcifront-msi", | ||
177 | DOMID_SELF); | ||
178 | if (irq < 0) | ||
179 | goto free; | ||
180 | i++; | ||
181 | } | ||
182 | kfree(v); | ||
183 | return 0; | ||
184 | |||
185 | error: | ||
186 | dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); | ||
187 | free: | ||
188 | kfree(v); | ||
189 | return ret; | ||
190 | } | ||
191 | |||
68 | #define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ | 192 | #define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ |
69 | MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) | 193 | MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) |
70 | 194 | ||
@@ -123,67 +247,6 @@ error: | |||
123 | return -ENODEV; | 247 | return -ENODEV; |
124 | } | 248 | } |
125 | 249 | ||
126 | /* | ||
127 | * For MSI interrupts we have to use drivers/xen/event.s functions to | ||
128 | * allocate an irq_desc and setup the right */ | ||
129 | |||
130 | |||
131 | static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | ||
132 | { | ||
133 | int irq, ret, i; | ||
134 | struct msi_desc *msidesc; | ||
135 | int *v; | ||
136 | |||
137 | v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); | ||
138 | if (!v) | ||
139 | return -ENOMEM; | ||
140 | |||
141 | if (type == PCI_CAP_ID_MSIX) | ||
142 | ret = xen_pci_frontend_enable_msix(dev, v, nvec); | ||
143 | else | ||
144 | ret = xen_pci_frontend_enable_msi(dev, v); | ||
145 | if (ret) | ||
146 | goto error; | ||
147 | i = 0; | ||
148 | list_for_each_entry(msidesc, &dev->msi_list, list) { | ||
149 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, | ||
150 | (type == PCI_CAP_ID_MSIX) ? | ||
151 | "pcifront-msi-x" : | ||
152 | "pcifront-msi", | ||
153 | DOMID_SELF); | ||
154 | if (irq < 0) | ||
155 | goto free; | ||
156 | i++; | ||
157 | } | ||
158 | kfree(v); | ||
159 | return 0; | ||
160 | |||
161 | error: | ||
162 | dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); | ||
163 | free: | ||
164 | kfree(v); | ||
165 | return ret; | ||
166 | } | ||
167 | |||
168 | static void xen_teardown_msi_irqs(struct pci_dev *dev) | ||
169 | { | ||
170 | struct msi_desc *msidesc; | ||
171 | |||
172 | msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); | ||
173 | if (msidesc->msi_attrib.is_msix) | ||
174 | xen_pci_frontend_disable_msix(dev); | ||
175 | else | ||
176 | xen_pci_frontend_disable_msi(dev); | ||
177 | |||
178 | /* Free the IRQ's and the msidesc using the generic code. */ | ||
179 | default_teardown_msi_irqs(dev); | ||
180 | } | ||
181 | |||
182 | static void xen_teardown_msi_irq(unsigned int irq) | ||
183 | { | ||
184 | xen_destroy_irq(irq); | ||
185 | } | ||
186 | |||
187 | #ifdef CONFIG_XEN_DOM0 | 250 | #ifdef CONFIG_XEN_DOM0 |
188 | static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 251 | static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
189 | { | 252 | { |
@@ -242,45 +305,28 @@ out: | |||
242 | return ret; | 305 | return ret; |
243 | } | 306 | } |
244 | #endif | 307 | #endif |
245 | #endif | ||
246 | 308 | ||
247 | static int xen_pcifront_enable_irq(struct pci_dev *dev) | 309 | static void xen_teardown_msi_irqs(struct pci_dev *dev) |
248 | { | 310 | { |
249 | int rc; | 311 | struct msi_desc *msidesc; |
250 | int share = 1; | ||
251 | int pirq; | ||
252 | u8 gsi; | ||
253 | |||
254 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); | ||
255 | if (rc < 0) { | ||
256 | dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", | ||
257 | rc); | ||
258 | return rc; | ||
259 | } | ||
260 | |||
261 | rc = xen_allocate_pirq_gsi(gsi); | ||
262 | if (rc < 0) { | ||
263 | dev_warn(&dev->dev, "Xen PCI: failed to allocate a PIRQ for GSI%d: %d\n", | ||
264 | gsi, rc); | ||
265 | return rc; | ||
266 | } | ||
267 | pirq = rc; | ||
268 | 312 | ||
269 | if (gsi < NR_IRQS_LEGACY) | 313 | msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); |
270 | share = 0; | 314 | if (msidesc->msi_attrib.is_msix) |
315 | xen_pci_frontend_disable_msix(dev); | ||
316 | else | ||
317 | xen_pci_frontend_disable_msi(dev); | ||
271 | 318 | ||
272 | rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); | 319 | /* Free the IRQ's and the msidesc using the generic code. */ |
273 | if (rc < 0) { | 320 | default_teardown_msi_irqs(dev); |
274 | dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", | 321 | } |
275 | gsi, pirq, rc); | ||
276 | return rc; | ||
277 | } | ||
278 | 322 | ||
279 | dev->irq = rc; | 323 | static void xen_teardown_msi_irq(unsigned int irq) |
280 | dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); | 324 | { |
281 | return 0; | 325 | xen_destroy_irq(irq); |
282 | } | 326 | } |
283 | 327 | ||
328 | #endif | ||
329 | |||
284 | int __init pci_xen_init(void) | 330 | int __init pci_xen_init(void) |
285 | { | 331 | { |
286 | if (!xen_pv_domain() || xen_initial_domain()) | 332 | if (!xen_pv_domain() || xen_initial_domain()) |
@@ -327,104 +373,13 @@ int __init pci_xen_hvm_init(void) | |||
327 | } | 373 | } |
328 | 374 | ||
329 | #ifdef CONFIG_XEN_DOM0 | 375 | #ifdef CONFIG_XEN_DOM0 |
330 | static int xen_register_pirq(u32 gsi, int triggering) | ||
331 | { | ||
332 | int rc, pirq, irq = -1; | ||
333 | struct physdev_map_pirq map_irq; | ||
334 | int shareable = 0; | ||
335 | char *name; | ||
336 | bool gsi_override = false; | ||
337 | |||
338 | if (!xen_pv_domain()) | ||
339 | return -1; | ||
340 | |||
341 | if (triggering == ACPI_EDGE_SENSITIVE) { | ||
342 | shareable = 0; | ||
343 | name = "ioapic-edge"; | ||
344 | } else { | ||
345 | shareable = 1; | ||
346 | name = "ioapic-level"; | ||
347 | } | ||
348 | |||
349 | pirq = xen_allocate_pirq_gsi(gsi); | ||
350 | if (pirq < 0) | ||
351 | goto out; | ||
352 | |||
353 | /* Before we bind the GSI to a Linux IRQ, check whether | ||
354 | * we need to override it with bus_irq (IRQ) value. Usually for | ||
355 | * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: | ||
356 | * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) | ||
357 | * but there are oddballs where the IRQ != GSI: | ||
358 | * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) | ||
359 | * which ends up being: gsi_to_irq[9] == 20 | ||
360 | * (which is what acpi_gsi_to_irq ends up calling when starting the | ||
361 | * the ACPI interpreter and keels over since IRQ 9 has not been | ||
362 | * setup as we had setup IRQ 20 for it). | ||
363 | */ | ||
364 | if (gsi == acpi_sci_override_gsi) { | ||
365 | /* Check whether the GSI != IRQ */ | ||
366 | acpi_gsi_to_irq(gsi, &irq); | ||
367 | if (irq != gsi) | ||
368 | /* Bugger, we MUST have that IRQ. */ | ||
369 | gsi_override = true; | ||
370 | } | ||
371 | if (gsi_override) | ||
372 | irq = xen_bind_pirq_gsi_to_irq(irq, pirq, shareable, name); | ||
373 | else | ||
374 | irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); | ||
375 | if (irq < 0) | ||
376 | goto out; | ||
377 | |||
378 | printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", pirq, irq, gsi); | ||
379 | |||
380 | map_irq.domid = DOMID_SELF; | ||
381 | map_irq.type = MAP_PIRQ_TYPE_GSI; | ||
382 | map_irq.index = gsi; | ||
383 | map_irq.pirq = pirq; | ||
384 | |||
385 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | ||
386 | if (rc) { | ||
387 | printk(KERN_WARNING "xen map irq failed %d\n", rc); | ||
388 | return -1; | ||
389 | } | ||
390 | |||
391 | out: | ||
392 | return irq; | ||
393 | } | ||
394 | |||
395 | static int xen_register_gsi(u32 gsi, int triggering, int polarity) | ||
396 | { | ||
397 | int rc, irq; | ||
398 | struct physdev_setup_gsi setup_gsi; | ||
399 | |||
400 | if (!xen_pv_domain()) | ||
401 | return -1; | ||
402 | |||
403 | printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", | ||
404 | gsi, triggering, polarity); | ||
405 | |||
406 | irq = xen_register_pirq(gsi, triggering); | ||
407 | |||
408 | setup_gsi.gsi = gsi; | ||
409 | setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); | ||
410 | setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
411 | |||
412 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); | ||
413 | if (rc == -EEXIST) | ||
414 | printk(KERN_INFO "Already setup the GSI :%d\n", gsi); | ||
415 | else if (rc) { | ||
416 | printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", | ||
417 | gsi, rc); | ||
418 | } | ||
419 | |||
420 | return irq; | ||
421 | } | ||
422 | |||
423 | static __init void xen_setup_acpi_sci(void) | 376 | static __init void xen_setup_acpi_sci(void) |
424 | { | 377 | { |
425 | int rc; | 378 | int rc; |
426 | int trigger, polarity; | 379 | int trigger, polarity; |
427 | int gsi = acpi_sci_override_gsi; | 380 | int gsi = acpi_sci_override_gsi; |
381 | int irq = -1; | ||
382 | int gsi_override = -1; | ||
428 | 383 | ||
429 | if (!gsi) | 384 | if (!gsi) |
430 | return; | 385 | return; |
@@ -437,51 +392,43 @@ static __init void xen_setup_acpi_sci(void) | |||
437 | } | 392 | } |
438 | trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; | 393 | trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; |
439 | polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; | 394 | polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; |
440 | 395 | ||
441 | printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " | 396 | printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " |
442 | "polarity=%d\n", gsi, trigger, polarity); | 397 | "polarity=%d\n", gsi, trigger, polarity); |
443 | 398 | ||
444 | gsi = xen_register_gsi(gsi, trigger, polarity); | 399 | /* Before we bind the GSI to a Linux IRQ, check whether |
400 | * we need to override it with bus_irq (IRQ) value. Usually for | ||
401 | * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: | ||
402 | * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) | ||
403 | * but there are oddballs where the IRQ != GSI: | ||
404 | * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) | ||
405 | * which ends up being: gsi_to_irq[9] == 20 | ||
406 | * (which is what acpi_gsi_to_irq ends up calling when starting the | ||
407 | * the ACPI interpreter and keels over since IRQ 9 has not been | ||
408 | * setup as we had setup IRQ 20 for it). | ||
409 | */ | ||
410 | if (acpi_gsi_to_irq(gsi, &irq) == 0) { | ||
411 | /* Use the provided value if it's valid. */ | ||
412 | if (irq >= 0) | ||
413 | gsi_override = irq; | ||
414 | } | ||
415 | |||
416 | gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity); | ||
445 | printk(KERN_INFO "xen: acpi sci %d\n", gsi); | 417 | printk(KERN_INFO "xen: acpi sci %d\n", gsi); |
446 | 418 | ||
447 | return; | 419 | return; |
448 | } | 420 | } |
449 | 421 | ||
450 | static int acpi_register_gsi_xen(struct device *dev, u32 gsi, | 422 | int __init pci_xen_initial_domain(void) |
451 | int trigger, int polarity) | ||
452 | { | 423 | { |
453 | return xen_register_gsi(gsi, trigger, polarity); | 424 | int irq; |
454 | } | ||
455 | 425 | ||
456 | static int __init pci_xen_initial_domain(void) | ||
457 | { | ||
458 | #ifdef CONFIG_PCI_MSI | 426 | #ifdef CONFIG_PCI_MSI |
459 | x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; | 427 | x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; |
460 | x86_msi.teardown_msi_irq = xen_teardown_msi_irq; | 428 | x86_msi.teardown_msi_irq = xen_teardown_msi_irq; |
461 | #endif | 429 | #endif |
462 | xen_setup_acpi_sci(); | 430 | xen_setup_acpi_sci(); |
463 | __acpi_register_gsi = acpi_register_gsi_xen; | 431 | __acpi_register_gsi = acpi_register_gsi_xen; |
464 | |||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | void __init xen_setup_pirqs(void) | ||
469 | { | ||
470 | int pirq, irq; | ||
471 | |||
472 | pci_xen_initial_domain(); | ||
473 | |||
474 | if (0 == nr_ioapics) { | ||
475 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { | ||
476 | pirq = xen_allocate_pirq_gsi(irq); | ||
477 | if (WARN(pirq < 0, | ||
478 | "Could not allocate PIRQ for legacy interrupt\n")) | ||
479 | break; | ||
480 | irq = xen_bind_pirq_gsi_to_irq(irq, pirq, 0, "xt-pic"); | ||
481 | } | ||
482 | return; | ||
483 | } | ||
484 | |||
485 | /* Pre-allocate legacy irqs */ | 432 | /* Pre-allocate legacy irqs */ |
486 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { | 433 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { |
487 | int trigger, polarity; | 434 | int trigger, polarity; |
@@ -489,13 +436,17 @@ void __init xen_setup_pirqs(void) | |||
489 | if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) | 436 | if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) |
490 | continue; | 437 | continue; |
491 | 438 | ||
492 | xen_register_pirq(irq, | 439 | xen_register_pirq(irq, -1 /* no GSI override */, |
493 | trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE); | 440 | trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE, |
441 | true /* Map GSI to PIRQ */); | ||
494 | } | 442 | } |
443 | if (0 == nr_ioapics) { | ||
444 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) | ||
445 | xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic"); | ||
446 | } | ||
447 | return 0; | ||
495 | } | 448 | } |
496 | #endif | ||
497 | 449 | ||
498 | #ifdef CONFIG_XEN_DOM0 | ||
499 | struct xen_device_domain_owner { | 450 | struct xen_device_domain_owner { |
500 | domid_t domain; | 451 | domid_t domain; |
501 | struct pci_dev *dev; | 452 | struct pci_dev *dev; |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 474356b98ede..3ae4128013e6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -51,7 +51,17 @@ | |||
51 | int efi_enabled; | 51 | int efi_enabled; |
52 | EXPORT_SYMBOL(efi_enabled); | 52 | EXPORT_SYMBOL(efi_enabled); |
53 | 53 | ||
54 | struct efi efi; | 54 | struct efi __read_mostly efi = { |
55 | .mps = EFI_INVALID_TABLE_ADDR, | ||
56 | .acpi = EFI_INVALID_TABLE_ADDR, | ||
57 | .acpi20 = EFI_INVALID_TABLE_ADDR, | ||
58 | .smbios = EFI_INVALID_TABLE_ADDR, | ||
59 | .sal_systab = EFI_INVALID_TABLE_ADDR, | ||
60 | .boot_info = EFI_INVALID_TABLE_ADDR, | ||
61 | .hcdp = EFI_INVALID_TABLE_ADDR, | ||
62 | .uga = EFI_INVALID_TABLE_ADDR, | ||
63 | .uv_systab = EFI_INVALID_TABLE_ADDR, | ||
64 | }; | ||
55 | EXPORT_SYMBOL(efi); | 65 | EXPORT_SYMBOL(efi); |
56 | 66 | ||
57 | struct efi_memory_map memmap; | 67 | struct efi_memory_map memmap; |
@@ -79,26 +89,50 @@ early_param("add_efi_memmap", setup_add_efi_memmap); | |||
79 | 89 | ||
80 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | 90 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) |
81 | { | 91 | { |
82 | return efi_call_virt2(get_time, tm, tc); | 92 | unsigned long flags; |
93 | efi_status_t status; | ||
94 | |||
95 | spin_lock_irqsave(&rtc_lock, flags); | ||
96 | status = efi_call_virt2(get_time, tm, tc); | ||
97 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
98 | return status; | ||
83 | } | 99 | } |
84 | 100 | ||
85 | static efi_status_t virt_efi_set_time(efi_time_t *tm) | 101 | static efi_status_t virt_efi_set_time(efi_time_t *tm) |
86 | { | 102 | { |
87 | return efi_call_virt1(set_time, tm); | 103 | unsigned long flags; |
104 | efi_status_t status; | ||
105 | |||
106 | spin_lock_irqsave(&rtc_lock, flags); | ||
107 | status = efi_call_virt1(set_time, tm); | ||
108 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
109 | return status; | ||
88 | } | 110 | } |
89 | 111 | ||
90 | static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, | 112 | static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, |
91 | efi_bool_t *pending, | 113 | efi_bool_t *pending, |
92 | efi_time_t *tm) | 114 | efi_time_t *tm) |
93 | { | 115 | { |
94 | return efi_call_virt3(get_wakeup_time, | 116 | unsigned long flags; |
95 | enabled, pending, tm); | 117 | efi_status_t status; |
118 | |||
119 | spin_lock_irqsave(&rtc_lock, flags); | ||
120 | status = efi_call_virt3(get_wakeup_time, | ||
121 | enabled, pending, tm); | ||
122 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
123 | return status; | ||
96 | } | 124 | } |
97 | 125 | ||
98 | static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) | 126 | static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) |
99 | { | 127 | { |
100 | return efi_call_virt2(set_wakeup_time, | 128 | unsigned long flags; |
101 | enabled, tm); | 129 | efi_status_t status; |
130 | |||
131 | spin_lock_irqsave(&rtc_lock, flags); | ||
132 | status = efi_call_virt2(set_wakeup_time, | ||
133 | enabled, tm); | ||
134 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
135 | return status; | ||
102 | } | 136 | } |
103 | 137 | ||
104 | static efi_status_t virt_efi_get_variable(efi_char16_t *name, | 138 | static efi_status_t virt_efi_get_variable(efi_char16_t *name, |
@@ -122,7 +156,7 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, | |||
122 | 156 | ||
123 | static efi_status_t virt_efi_set_variable(efi_char16_t *name, | 157 | static efi_status_t virt_efi_set_variable(efi_char16_t *name, |
124 | efi_guid_t *vendor, | 158 | efi_guid_t *vendor, |
125 | unsigned long attr, | 159 | u32 attr, |
126 | unsigned long data_size, | 160 | unsigned long data_size, |
127 | void *data) | 161 | void *data) |
128 | { | 162 | { |
@@ -131,6 +165,18 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, | |||
131 | data_size, data); | 165 | data_size, data); |
132 | } | 166 | } |
133 | 167 | ||
168 | static efi_status_t virt_efi_query_variable_info(u32 attr, | ||
169 | u64 *storage_space, | ||
170 | u64 *remaining_space, | ||
171 | u64 *max_variable_size) | ||
172 | { | ||
173 | if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) | ||
174 | return EFI_UNSUPPORTED; | ||
175 | |||
176 | return efi_call_virt4(query_variable_info, attr, storage_space, | ||
177 | remaining_space, max_variable_size); | ||
178 | } | ||
179 | |||
134 | static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) | 180 | static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) |
135 | { | 181 | { |
136 | return efi_call_virt1(get_next_high_mono_count, count); | 182 | return efi_call_virt1(get_next_high_mono_count, count); |
@@ -145,6 +191,28 @@ static void virt_efi_reset_system(int reset_type, | |||
145 | data_size, data); | 191 | data_size, data); |
146 | } | 192 | } |
147 | 193 | ||
194 | static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, | ||
195 | unsigned long count, | ||
196 | unsigned long sg_list) | ||
197 | { | ||
198 | if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) | ||
199 | return EFI_UNSUPPORTED; | ||
200 | |||
201 | return efi_call_virt3(update_capsule, capsules, count, sg_list); | ||
202 | } | ||
203 | |||
204 | static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, | ||
205 | unsigned long count, | ||
206 | u64 *max_size, | ||
207 | int *reset_type) | ||
208 | { | ||
209 | if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) | ||
210 | return EFI_UNSUPPORTED; | ||
211 | |||
212 | return efi_call_virt4(query_capsule_caps, capsules, count, max_size, | ||
213 | reset_type); | ||
214 | } | ||
215 | |||
148 | static efi_status_t __init phys_efi_set_virtual_address_map( | 216 | static efi_status_t __init phys_efi_set_virtual_address_map( |
149 | unsigned long memory_map_size, | 217 | unsigned long memory_map_size, |
150 | unsigned long descriptor_size, | 218 | unsigned long descriptor_size, |
@@ -164,11 +232,14 @@ static efi_status_t __init phys_efi_set_virtual_address_map( | |||
164 | static efi_status_t __init phys_efi_get_time(efi_time_t *tm, | 232 | static efi_status_t __init phys_efi_get_time(efi_time_t *tm, |
165 | efi_time_cap_t *tc) | 233 | efi_time_cap_t *tc) |
166 | { | 234 | { |
235 | unsigned long flags; | ||
167 | efi_status_t status; | 236 | efi_status_t status; |
168 | 237 | ||
238 | spin_lock_irqsave(&rtc_lock, flags); | ||
169 | efi_call_phys_prelog(); | 239 | efi_call_phys_prelog(); |
170 | status = efi_call_phys2(efi_phys.get_time, tm, tc); | 240 | status = efi_call_phys2(efi_phys.get_time, tm, tc); |
171 | efi_call_phys_epilog(); | 241 | efi_call_phys_epilog(); |
242 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
172 | return status; | 243 | return status; |
173 | } | 244 | } |
174 | 245 | ||
@@ -504,9 +575,6 @@ void __init efi_init(void) | |||
504 | x86_platform.set_wallclock = efi_set_rtc_mmss; | 575 | x86_platform.set_wallclock = efi_set_rtc_mmss; |
505 | #endif | 576 | #endif |
506 | 577 | ||
507 | /* Setup for EFI runtime service */ | ||
508 | reboot_type = BOOT_EFI; | ||
509 | |||
510 | #if EFI_DEBUG | 578 | #if EFI_DEBUG |
511 | print_efi_memmap(); | 579 | print_efi_memmap(); |
512 | #endif | 580 | #endif |
@@ -672,6 +740,9 @@ void __init efi_enter_virtual_mode(void) | |||
672 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; | 740 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; |
673 | efi.reset_system = virt_efi_reset_system; | 741 | efi.reset_system = virt_efi_reset_system; |
674 | efi.set_virtual_address_map = NULL; | 742 | efi.set_virtual_address_map = NULL; |
743 | efi.query_variable_info = virt_efi_query_variable_info; | ||
744 | efi.update_capsule = virt_efi_update_capsule; | ||
745 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | ||
675 | if (__supported_pte_mask & _PAGE_NX) | 746 | if (__supported_pte_mask & _PAGE_NX) |
676 | runtime_code_page_mkexec(); | 747 | runtime_code_page_mkexec(); |
677 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); | 748 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 68e467f69fec..db8b915f54bc 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -296,14 +296,18 @@ static void bau_process_message(struct msg_desc *mdp, | |||
296 | } | 296 | } |
297 | 297 | ||
298 | /* | 298 | /* |
299 | * Determine the first cpu on a uvhub. | 299 | * Determine the first cpu on a pnode. |
300 | */ | 300 | */ |
301 | static int uvhub_to_first_cpu(int uvhub) | 301 | static int pnode_to_first_cpu(int pnode, struct bau_control *smaster) |
302 | { | 302 | { |
303 | int cpu; | 303 | int cpu; |
304 | for_each_present_cpu(cpu) | 304 | struct hub_and_pnode *hpp; |
305 | if (uvhub == uv_cpu_to_blade_id(cpu)) | 305 | |
306 | for_each_present_cpu(cpu) { | ||
307 | hpp = &smaster->thp[cpu]; | ||
308 | if (pnode == hpp->pnode) | ||
306 | return cpu; | 309 | return cpu; |
310 | } | ||
307 | return -1; | 311 | return -1; |
308 | } | 312 | } |
309 | 313 | ||
@@ -366,28 +370,32 @@ static void do_reset(void *ptr) | |||
366 | * Use IPI to get all target uvhubs to release resources held by | 370 | * Use IPI to get all target uvhubs to release resources held by |
367 | * a given sending cpu number. | 371 | * a given sending cpu number. |
368 | */ | 372 | */ |
369 | static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender) | 373 | static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) |
370 | { | 374 | { |
371 | int uvhub; | 375 | int pnode; |
376 | int apnode; | ||
372 | int maskbits; | 377 | int maskbits; |
373 | cpumask_t mask; | 378 | int sender = bcp->cpu; |
379 | cpumask_t *mask = bcp->uvhub_master->cpumask; | ||
380 | struct bau_control *smaster = bcp->socket_master; | ||
374 | struct reset_args reset_args; | 381 | struct reset_args reset_args; |
375 | 382 | ||
376 | reset_args.sender = sender; | 383 | reset_args.sender = sender; |
377 | cpus_clear(mask); | 384 | cpus_clear(*mask); |
378 | /* find a single cpu for each uvhub in this distribution mask */ | 385 | /* find a single cpu for each uvhub in this distribution mask */ |
379 | maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE; | 386 | maskbits = sizeof(struct pnmask) * BITSPERBYTE; |
380 | for (uvhub = 0; uvhub < maskbits; uvhub++) { | 387 | /* each bit is a pnode relative to the partition base pnode */ |
388 | for (pnode = 0; pnode < maskbits; pnode++) { | ||
381 | int cpu; | 389 | int cpu; |
382 | if (!bau_uvhub_isset(uvhub, distribution)) | 390 | if (!bau_uvhub_isset(pnode, distribution)) |
383 | continue; | 391 | continue; |
384 | /* find a cpu for this uvhub */ | 392 | apnode = pnode + bcp->partition_base_pnode; |
385 | cpu = uvhub_to_first_cpu(uvhub); | 393 | cpu = pnode_to_first_cpu(apnode, smaster); |
386 | cpu_set(cpu, mask); | 394 | cpu_set(cpu, *mask); |
387 | } | 395 | } |
388 | 396 | ||
389 | /* IPI all cpus; preemption is already disabled */ | 397 | /* IPI all cpus; preemption is already disabled */ |
390 | smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1); | 398 | smp_call_function_many(mask, do_reset, (void *)&reset_args, 1); |
391 | return; | 399 | return; |
392 | } | 400 | } |
393 | 401 | ||
@@ -604,7 +612,7 @@ static void destination_plugged(struct bau_desc *bau_desc, | |||
604 | quiesce_local_uvhub(hmaster); | 612 | quiesce_local_uvhub(hmaster); |
605 | 613 | ||
606 | spin_lock(&hmaster->queue_lock); | 614 | spin_lock(&hmaster->queue_lock); |
607 | reset_with_ipi(&bau_desc->distribution, bcp->cpu); | 615 | reset_with_ipi(&bau_desc->distribution, bcp); |
608 | spin_unlock(&hmaster->queue_lock); | 616 | spin_unlock(&hmaster->queue_lock); |
609 | 617 | ||
610 | end_uvhub_quiesce(hmaster); | 618 | end_uvhub_quiesce(hmaster); |
@@ -626,7 +634,7 @@ static void destination_timeout(struct bau_desc *bau_desc, | |||
626 | quiesce_local_uvhub(hmaster); | 634 | quiesce_local_uvhub(hmaster); |
627 | 635 | ||
628 | spin_lock(&hmaster->queue_lock); | 636 | spin_lock(&hmaster->queue_lock); |
629 | reset_with_ipi(&bau_desc->distribution, bcp->cpu); | 637 | reset_with_ipi(&bau_desc->distribution, bcp); |
630 | spin_unlock(&hmaster->queue_lock); | 638 | spin_unlock(&hmaster->queue_lock); |
631 | 639 | ||
632 | end_uvhub_quiesce(hmaster); | 640 | end_uvhub_quiesce(hmaster); |
@@ -1334,9 +1342,10 @@ static ssize_t tunables_write(struct file *file, const char __user *user, | |||
1334 | 1342 | ||
1335 | instr[count] = '\0'; | 1343 | instr[count] = '\0'; |
1336 | 1344 | ||
1337 | bcp = &per_cpu(bau_control, smp_processor_id()); | 1345 | cpu = get_cpu(); |
1338 | 1346 | bcp = &per_cpu(bau_control, cpu); | |
1339 | ret = parse_tunables_write(bcp, instr, count); | 1347 | ret = parse_tunables_write(bcp, instr, count); |
1348 | put_cpu(); | ||
1340 | if (ret) | 1349 | if (ret) |
1341 | return ret; | 1350 | return ret; |
1342 | 1351 | ||
@@ -1687,6 +1696,16 @@ static void make_per_cpu_thp(struct bau_control *smaster) | |||
1687 | } | 1696 | } |
1688 | 1697 | ||
1689 | /* | 1698 | /* |
1699 | * Each uvhub is to get a local cpumask. | ||
1700 | */ | ||
1701 | static void make_per_hub_cpumask(struct bau_control *hmaster) | ||
1702 | { | ||
1703 | int sz = sizeof(cpumask_t); | ||
1704 | |||
1705 | hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode); | ||
1706 | } | ||
1707 | |||
1708 | /* | ||
1690 | * Initialize all the per_cpu information for the cpu's on a given socket, | 1709 | * Initialize all the per_cpu information for the cpu's on a given socket, |
1691 | * given what has been gathered into the socket_desc struct. | 1710 | * given what has been gathered into the socket_desc struct. |
1692 | * And reports the chosen hub and socket masters back to the caller. | 1711 | * And reports the chosen hub and socket masters back to the caller. |
@@ -1751,11 +1770,12 @@ static int __init summarize_uvhub_sockets(int nuvhubs, | |||
1751 | sdp = &bdp->socket[socket]; | 1770 | sdp = &bdp->socket[socket]; |
1752 | if (scan_sock(sdp, bdp, &smaster, &hmaster)) | 1771 | if (scan_sock(sdp, bdp, &smaster, &hmaster)) |
1753 | return 1; | 1772 | return 1; |
1773 | make_per_cpu_thp(smaster); | ||
1754 | } | 1774 | } |
1755 | socket++; | 1775 | socket++; |
1756 | socket_mask = (socket_mask >> 1); | 1776 | socket_mask = (socket_mask >> 1); |
1757 | make_per_cpu_thp(smaster); | ||
1758 | } | 1777 | } |
1778 | make_per_hub_cpumask(hmaster); | ||
1759 | } | 1779 | } |
1760 | return 0; | 1780 | return 0; |
1761 | } | 1781 | } |
@@ -1777,15 +1797,20 @@ static int __init init_per_cpu(int nuvhubs, int base_part_pnode) | |||
1777 | uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); | 1797 | uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); |
1778 | 1798 | ||
1779 | if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) | 1799 | if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) |
1780 | return 1; | 1800 | goto fail; |
1781 | 1801 | ||
1782 | if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) | 1802 | if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) |
1783 | return 1; | 1803 | goto fail; |
1784 | 1804 | ||
1785 | kfree(uvhub_descs); | 1805 | kfree(uvhub_descs); |
1786 | kfree(uvhub_mask); | 1806 | kfree(uvhub_mask); |
1787 | init_per_cpu_tunables(); | 1807 | init_per_cpu_tunables(); |
1788 | return 0; | 1808 | return 0; |
1809 | |||
1810 | fail: | ||
1811 | kfree(uvhub_descs); | ||
1812 | kfree(uvhub_mask); | ||
1813 | return 1; | ||
1789 | } | 1814 | } |
1790 | 1815 | ||
1791 | /* | 1816 | /* |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index bef0bc962400..5d179502a52c 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -26,6 +26,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) | |||
26 | export CPPFLAGS_vdso.lds += -P -C | 26 | export CPPFLAGS_vdso.lds += -P -C |
27 | 27 | ||
28 | VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ | 28 | VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ |
29 | -Wl,--no-undefined \ | ||
29 | -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 | 30 | -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 |
30 | 31 | ||
31 | $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so | 32 | $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index a724905fdae7..6bc0e723b6e8 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * | 6 | * |
7 | * The code should have no internal unresolved relocations. | 7 | * The code should have no internal unresolved relocations. |
8 | * Check with readelf after changing. | 8 | * Check with readelf after changing. |
9 | * Also alternative() doesn't work. | ||
10 | */ | 9 | */ |
11 | 10 | ||
12 | /* Disable profiling for userspace code: */ | 11 | /* Disable profiling for userspace code: */ |
@@ -17,6 +16,7 @@ | |||
17 | #include <linux/time.h> | 16 | #include <linux/time.h> |
18 | #include <linux/string.h> | 17 | #include <linux/string.h> |
19 | #include <asm/vsyscall.h> | 18 | #include <asm/vsyscall.h> |
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/vgtod.h> | 20 | #include <asm/vgtod.h> |
21 | #include <asm/timex.h> | 21 | #include <asm/timex.h> |
22 | #include <asm/hpet.h> | 22 | #include <asm/hpet.h> |
@@ -25,6 +25,43 @@ | |||
25 | 25 | ||
26 | #define gtod (&VVAR(vsyscall_gtod_data)) | 26 | #define gtod (&VVAR(vsyscall_gtod_data)) |
27 | 27 | ||
28 | notrace static cycle_t vread_tsc(void) | ||
29 | { | ||
30 | cycle_t ret; | ||
31 | u64 last; | ||
32 | |||
33 | /* | ||
34 | * Empirically, a fence (of type that depends on the CPU) | ||
35 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
36 | * with respect to loads. The various CPU manuals are unclear | ||
37 | * as to whether rdtsc can be reordered with later loads, | ||
38 | * but no one has ever seen it happen. | ||
39 | */ | ||
40 | rdtsc_barrier(); | ||
41 | ret = (cycle_t)vget_cycles(); | ||
42 | |||
43 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
44 | |||
45 | if (likely(ret >= last)) | ||
46 | return ret; | ||
47 | |||
48 | /* | ||
49 | * GCC likes to generate cmov here, but this branch is extremely | ||
50 | * predictable (it's just a funciton of time and the likely is | ||
51 | * very likely) and there's a data dependence, so force GCC | ||
52 | * to generate a branch instead. I don't barrier() because | ||
53 | * we don't actually need a barrier, and if this function | ||
54 | * ever gets inlined it will generate worse code. | ||
55 | */ | ||
56 | asm volatile (""); | ||
57 | return last; | ||
58 | } | ||
59 | |||
60 | static notrace cycle_t vread_hpet(void) | ||
61 | { | ||
62 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); | ||
63 | } | ||
64 | |||
28 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | 65 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
29 | { | 66 | { |
30 | long ret; | 67 | long ret; |
@@ -36,9 +73,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | |||
36 | notrace static inline long vgetns(void) | 73 | notrace static inline long vgetns(void) |
37 | { | 74 | { |
38 | long v; | 75 | long v; |
39 | cycles_t (*vread)(void); | 76 | cycles_t cycles; |
40 | vread = gtod->clock.vread; | 77 | if (gtod->clock.vclock_mode == VCLOCK_TSC) |
41 | v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; | 78 | cycles = vread_tsc(); |
79 | else | ||
80 | cycles = vread_hpet(); | ||
81 | v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; | ||
42 | return (v * gtod->clock.mult) >> gtod->clock.shift; | 82 | return (v * gtod->clock.mult) >> gtod->clock.shift; |
43 | } | 83 | } |
44 | 84 | ||
@@ -116,21 +156,21 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts) | |||
116 | 156 | ||
117 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | 157 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
118 | { | 158 | { |
119 | if (likely(gtod->sysctl_enabled)) | 159 | switch (clock) { |
120 | switch (clock) { | 160 | case CLOCK_REALTIME: |
121 | case CLOCK_REALTIME: | 161 | if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) |
122 | if (likely(gtod->clock.vread)) | 162 | return do_realtime(ts); |
123 | return do_realtime(ts); | 163 | break; |
124 | break; | 164 | case CLOCK_MONOTONIC: |
125 | case CLOCK_MONOTONIC: | 165 | if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) |
126 | if (likely(gtod->clock.vread)) | 166 | return do_monotonic(ts); |
127 | return do_monotonic(ts); | 167 | break; |
128 | break; | 168 | case CLOCK_REALTIME_COARSE: |
129 | case CLOCK_REALTIME_COARSE: | 169 | return do_realtime_coarse(ts); |
130 | return do_realtime_coarse(ts); | 170 | case CLOCK_MONOTONIC_COARSE: |
131 | case CLOCK_MONOTONIC_COARSE: | 171 | return do_monotonic_coarse(ts); |
132 | return do_monotonic_coarse(ts); | 172 | } |
133 | } | 173 | |
134 | return vdso_fallback_gettime(clock, ts); | 174 | return vdso_fallback_gettime(clock, ts); |
135 | } | 175 | } |
136 | int clock_gettime(clockid_t, struct timespec *) | 176 | int clock_gettime(clockid_t, struct timespec *) |
@@ -139,7 +179,7 @@ int clock_gettime(clockid_t, struct timespec *) | |||
139 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | 179 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
140 | { | 180 | { |
141 | long ret; | 181 | long ret; |
142 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { | 182 | if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) { |
143 | if (likely(tv != NULL)) { | 183 | if (likely(tv != NULL)) { |
144 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | 184 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != |
145 | offsetof(struct timespec, tv_nsec) || | 185 | offsetof(struct timespec, tv_nsec) || |
@@ -161,27 +201,14 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | |||
161 | int gettimeofday(struct timeval *, struct timezone *) | 201 | int gettimeofday(struct timeval *, struct timezone *) |
162 | __attribute__((weak, alias("__vdso_gettimeofday"))); | 202 | __attribute__((weak, alias("__vdso_gettimeofday"))); |
163 | 203 | ||
164 | /* This will break when the xtime seconds get inaccurate, but that is | 204 | /* |
165 | * unlikely */ | 205 | * This will break when the xtime seconds get inaccurate, but that is |
166 | 206 | * unlikely | |
167 | static __always_inline long time_syscall(long *t) | 207 | */ |
168 | { | ||
169 | long secs; | ||
170 | asm volatile("syscall" | ||
171 | : "=a" (secs) | ||
172 | : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); | ||
173 | return secs; | ||
174 | } | ||
175 | |||
176 | notrace time_t __vdso_time(time_t *t) | 208 | notrace time_t __vdso_time(time_t *t) |
177 | { | 209 | { |
178 | time_t result; | ||
179 | |||
180 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled)) | ||
181 | return time_syscall(t); | ||
182 | |||
183 | /* This is atomic on x86_64 so we don't need any locks. */ | 210 | /* This is atomic on x86_64 so we don't need any locks. */ |
184 | result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); | 211 | time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); |
185 | 212 | ||
186 | if (t) | 213 | if (t) |
187 | *t = result; | 214 | *t = result; |
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 1d3aa6b87181..1b979c12ba85 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S | |||
@@ -1,10 +1,21 @@ | |||
1 | #include <asm/page_types.h> | ||
2 | #include <linux/linkage.h> | ||
1 | #include <linux/init.h> | 3 | #include <linux/init.h> |
2 | 4 | ||
3 | __INITDATA | 5 | __PAGE_ALIGNED_DATA |
4 | 6 | ||
5 | .globl vdso_start, vdso_end | 7 | .globl vdso_start, vdso_end |
8 | .align PAGE_SIZE | ||
6 | vdso_start: | 9 | vdso_start: |
7 | .incbin "arch/x86/vdso/vdso.so" | 10 | .incbin "arch/x86/vdso/vdso.so" |
8 | vdso_end: | 11 | vdso_end: |
9 | 12 | ||
10 | __FINIT | 13 | .previous |
14 | |||
15 | .globl vdso_pages | ||
16 | .bss | ||
17 | .align 8 | ||
18 | .type vdso_pages, @object | ||
19 | vdso_pages: | ||
20 | .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 | ||
21 | .size vdso_pages, .-vdso_pages | ||
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 7abd2be0f9b9..316fbca3490e 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -14,41 +14,61 @@ | |||
14 | #include <asm/vgtod.h> | 14 | #include <asm/vgtod.h> |
15 | #include <asm/proto.h> | 15 | #include <asm/proto.h> |
16 | #include <asm/vdso.h> | 16 | #include <asm/vdso.h> |
17 | #include <asm/page.h> | ||
17 | 18 | ||
18 | unsigned int __read_mostly vdso_enabled = 1; | 19 | unsigned int __read_mostly vdso_enabled = 1; |
19 | 20 | ||
20 | extern char vdso_start[], vdso_end[]; | 21 | extern char vdso_start[], vdso_end[]; |
21 | extern unsigned short vdso_sync_cpuid; | 22 | extern unsigned short vdso_sync_cpuid; |
22 | 23 | ||
23 | static struct page **vdso_pages; | 24 | extern struct page *vdso_pages[]; |
24 | static unsigned vdso_size; | 25 | static unsigned vdso_size; |
25 | 26 | ||
26 | static int __init init_vdso_vars(void) | 27 | static void __init patch_vdso(void *vdso, size_t len) |
28 | { | ||
29 | Elf64_Ehdr *hdr = vdso; | ||
30 | Elf64_Shdr *sechdrs, *alt_sec = 0; | ||
31 | char *secstrings; | ||
32 | void *alt_data; | ||
33 | int i; | ||
34 | |||
35 | BUG_ON(len < sizeof(Elf64_Ehdr)); | ||
36 | BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0); | ||
37 | |||
38 | sechdrs = (void *)hdr + hdr->e_shoff; | ||
39 | secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | ||
40 | |||
41 | for (i = 1; i < hdr->e_shnum; i++) { | ||
42 | Elf64_Shdr *shdr = &sechdrs[i]; | ||
43 | if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) { | ||
44 | alt_sec = shdr; | ||
45 | goto found; | ||
46 | } | ||
47 | } | ||
48 | |||
49 | /* If we get here, it's probably a bug. */ | ||
50 | pr_warning("patch_vdso: .altinstructions not found\n"); | ||
51 | return; /* nothing to patch */ | ||
52 | |||
53 | found: | ||
54 | alt_data = (void *)hdr + alt_sec->sh_offset; | ||
55 | apply_alternatives(alt_data, alt_data + alt_sec->sh_size); | ||
56 | } | ||
57 | |||
58 | static int __init init_vdso(void) | ||
27 | { | 59 | { |
28 | int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; | 60 | int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; |
29 | int i; | 61 | int i; |
30 | 62 | ||
63 | patch_vdso(vdso_start, vdso_end - vdso_start); | ||
64 | |||
31 | vdso_size = npages << PAGE_SHIFT; | 65 | vdso_size = npages << PAGE_SHIFT; |
32 | vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); | 66 | for (i = 0; i < npages; i++) |
33 | if (!vdso_pages) | 67 | vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); |
34 | goto oom; | ||
35 | for (i = 0; i < npages; i++) { | ||
36 | struct page *p; | ||
37 | p = alloc_page(GFP_KERNEL); | ||
38 | if (!p) | ||
39 | goto oom; | ||
40 | vdso_pages[i] = p; | ||
41 | copy_page(page_address(p), vdso_start + i*PAGE_SIZE); | ||
42 | } | ||
43 | 68 | ||
44 | return 0; | 69 | return 0; |
45 | |||
46 | oom: | ||
47 | printk("Cannot allocate vdso\n"); | ||
48 | vdso_enabled = 0; | ||
49 | return -ENOMEM; | ||
50 | } | 70 | } |
51 | subsys_initcall(init_vdso_vars); | 71 | subsys_initcall(init_vdso); |
52 | 72 | ||
53 | struct linux_binprm; | 73 | struct linux_binprm; |
54 | 74 | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 17c565de3d64..ccf73b2f3e69 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -13,10 +13,10 @@ CFLAGS_mmu.o := $(nostackp) | |||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o platform-pci-unplug.o \ | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | 16 | p2m.o trace.o |
17 | 17 | ||
18 | obj-$(CONFIG_SMP) += smp.o | 18 | obj-$(CONFIG_SMP) += smp.o |
19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
20 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o | 20 | obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o |
21 | 21 | obj-$(CONFIG_XEN_DOM0) += vga.o | |
22 | obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o | 22 | obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5525163a0398..974a528458a0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -341,6 +341,8 @@ static void xen_set_ldt(const void *addr, unsigned entries) | |||
341 | struct mmuext_op *op; | 341 | struct mmuext_op *op; |
342 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | 342 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); |
343 | 343 | ||
344 | trace_xen_cpu_set_ldt(addr, entries); | ||
345 | |||
344 | op = mcs.args; | 346 | op = mcs.args; |
345 | op->cmd = MMUEXT_SET_LDT; | 347 | op->cmd = MMUEXT_SET_LDT; |
346 | op->arg1.linear_addr = (unsigned long)addr; | 348 | op->arg1.linear_addr = (unsigned long)addr; |
@@ -496,6 +498,8 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
496 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); | 498 | xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]); |
497 | u64 entry = *(u64 *)ptr; | 499 | u64 entry = *(u64 *)ptr; |
498 | 500 | ||
501 | trace_xen_cpu_write_ldt_entry(dt, entrynum, entry); | ||
502 | |||
499 | preempt_disable(); | 503 | preempt_disable(); |
500 | 504 | ||
501 | xen_mc_flush(); | 505 | xen_mc_flush(); |
@@ -565,6 +569,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) | |||
565 | unsigned long p = (unsigned long)&dt[entrynum]; | 569 | unsigned long p = (unsigned long)&dt[entrynum]; |
566 | unsigned long start, end; | 570 | unsigned long start, end; |
567 | 571 | ||
572 | trace_xen_cpu_write_idt_entry(dt, entrynum, g); | ||
573 | |||
568 | preempt_disable(); | 574 | preempt_disable(); |
569 | 575 | ||
570 | start = __this_cpu_read(idt_desc.address); | 576 | start = __this_cpu_read(idt_desc.address); |
@@ -619,6 +625,8 @@ static void xen_load_idt(const struct desc_ptr *desc) | |||
619 | static DEFINE_SPINLOCK(lock); | 625 | static DEFINE_SPINLOCK(lock); |
620 | static struct trap_info traps[257]; | 626 | static struct trap_info traps[257]; |
621 | 627 | ||
628 | trace_xen_cpu_load_idt(desc); | ||
629 | |||
622 | spin_lock(&lock); | 630 | spin_lock(&lock); |
623 | 631 | ||
624 | __get_cpu_var(idt_desc) = *desc; | 632 | __get_cpu_var(idt_desc) = *desc; |
@@ -637,6 +645,8 @@ static void xen_load_idt(const struct desc_ptr *desc) | |||
637 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | 645 | static void xen_write_gdt_entry(struct desc_struct *dt, int entry, |
638 | const void *desc, int type) | 646 | const void *desc, int type) |
639 | { | 647 | { |
648 | trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); | ||
649 | |||
640 | preempt_disable(); | 650 | preempt_disable(); |
641 | 651 | ||
642 | switch (type) { | 652 | switch (type) { |
@@ -665,6 +675,8 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, | |||
665 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | 675 | static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, |
666 | const void *desc, int type) | 676 | const void *desc, int type) |
667 | { | 677 | { |
678 | trace_xen_cpu_write_gdt_entry(dt, entry, desc, type); | ||
679 | |||
668 | switch (type) { | 680 | switch (type) { |
669 | case DESC_LDT: | 681 | case DESC_LDT: |
670 | case DESC_TSS: | 682 | case DESC_TSS: |
@@ -684,7 +696,9 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | |||
684 | static void xen_load_sp0(struct tss_struct *tss, | 696 | static void xen_load_sp0(struct tss_struct *tss, |
685 | struct thread_struct *thread) | 697 | struct thread_struct *thread) |
686 | { | 698 | { |
687 | struct multicall_space mcs = xen_mc_entry(0); | 699 | struct multicall_space mcs; |
700 | |||
701 | mcs = xen_mc_entry(0); | ||
688 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | 702 | MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
689 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 703 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
690 | } | 704 | } |
@@ -1248,6 +1262,14 @@ asmlinkage void __init xen_start_kernel(void) | |||
1248 | if (pci_xen) | 1262 | if (pci_xen) |
1249 | x86_init.pci.arch_init = pci_xen_init; | 1263 | x86_init.pci.arch_init = pci_xen_init; |
1250 | } else { | 1264 | } else { |
1265 | const struct dom0_vga_console_info *info = | ||
1266 | (void *)((char *)xen_start_info + | ||
1267 | xen_start_info->console.dom0.info_off); | ||
1268 | |||
1269 | xen_init_vga(info, xen_start_info->console.dom0.info_size); | ||
1270 | xen_start_info->console.domU.mfn = 0; | ||
1271 | xen_start_info->console.domU.evtchn = 0; | ||
1272 | |||
1251 | /* Make sure ACS will be enabled */ | 1273 | /* Make sure ACS will be enabled */ |
1252 | pci_request_acs(); | 1274 | pci_request_acs(); |
1253 | } | 1275 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0ccccb67a993..f987bde77c49 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -48,6 +48,8 @@ | |||
48 | #include <linux/memblock.h> | 48 | #include <linux/memblock.h> |
49 | #include <linux/seq_file.h> | 49 | #include <linux/seq_file.h> |
50 | 50 | ||
51 | #include <trace/events/xen.h> | ||
52 | |||
51 | #include <asm/pgtable.h> | 53 | #include <asm/pgtable.h> |
52 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
53 | #include <asm/fixmap.h> | 55 | #include <asm/fixmap.h> |
@@ -194,6 +196,8 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid) | |||
194 | struct multicall_space mcs; | 196 | struct multicall_space mcs; |
195 | struct mmu_update *u; | 197 | struct mmu_update *u; |
196 | 198 | ||
199 | trace_xen_mmu_set_domain_pte(ptep, pteval, domid); | ||
200 | |||
197 | mcs = xen_mc_entry(sizeof(*u)); | 201 | mcs = xen_mc_entry(sizeof(*u)); |
198 | u = mcs.args; | 202 | u = mcs.args; |
199 | 203 | ||
@@ -225,6 +229,24 @@ static void xen_extend_mmu_update(const struct mmu_update *update) | |||
225 | *u = *update; | 229 | *u = *update; |
226 | } | 230 | } |
227 | 231 | ||
232 | static void xen_extend_mmuext_op(const struct mmuext_op *op) | ||
233 | { | ||
234 | struct multicall_space mcs; | ||
235 | struct mmuext_op *u; | ||
236 | |||
237 | mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u)); | ||
238 | |||
239 | if (mcs.mc != NULL) { | ||
240 | mcs.mc->args[1]++; | ||
241 | } else { | ||
242 | mcs = __xen_mc_entry(sizeof(*u)); | ||
243 | MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
244 | } | ||
245 | |||
246 | u = mcs.args; | ||
247 | *u = *op; | ||
248 | } | ||
249 | |||
228 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | 250 | static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) |
229 | { | 251 | { |
230 | struct mmu_update u; | 252 | struct mmu_update u; |
@@ -245,6 +267,8 @@ static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
245 | 267 | ||
246 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) | 268 | static void xen_set_pmd(pmd_t *ptr, pmd_t val) |
247 | { | 269 | { |
270 | trace_xen_mmu_set_pmd(ptr, val); | ||
271 | |||
248 | /* If page is not pinned, we can just update the entry | 272 | /* If page is not pinned, we can just update the entry |
249 | directly */ | 273 | directly */ |
250 | if (!xen_page_pinned(ptr)) { | 274 | if (!xen_page_pinned(ptr)) { |
@@ -282,22 +306,30 @@ static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval) | |||
282 | return true; | 306 | return true; |
283 | } | 307 | } |
284 | 308 | ||
285 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | 309 | static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) |
286 | { | 310 | { |
287 | if (!xen_batched_set_pte(ptep, pteval)) | 311 | if (!xen_batched_set_pte(ptep, pteval)) |
288 | native_set_pte(ptep, pteval); | 312 | native_set_pte(ptep, pteval); |
289 | } | 313 | } |
290 | 314 | ||
315 | static void xen_set_pte(pte_t *ptep, pte_t pteval) | ||
316 | { | ||
317 | trace_xen_mmu_set_pte(ptep, pteval); | ||
318 | __xen_set_pte(ptep, pteval); | ||
319 | } | ||
320 | |||
291 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 321 | static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
292 | pte_t *ptep, pte_t pteval) | 322 | pte_t *ptep, pte_t pteval) |
293 | { | 323 | { |
294 | xen_set_pte(ptep, pteval); | 324 | trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval); |
325 | __xen_set_pte(ptep, pteval); | ||
295 | } | 326 | } |
296 | 327 | ||
297 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 328 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
298 | unsigned long addr, pte_t *ptep) | 329 | unsigned long addr, pte_t *ptep) |
299 | { | 330 | { |
300 | /* Just return the pte as-is. We preserve the bits on commit */ | 331 | /* Just return the pte as-is. We preserve the bits on commit */ |
332 | trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep); | ||
301 | return *ptep; | 333 | return *ptep; |
302 | } | 334 | } |
303 | 335 | ||
@@ -306,6 +338,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
306 | { | 338 | { |
307 | struct mmu_update u; | 339 | struct mmu_update u; |
308 | 340 | ||
341 | trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte); | ||
309 | xen_mc_batch(); | 342 | xen_mc_batch(); |
310 | 343 | ||
311 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; | 344 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
@@ -530,6 +563,8 @@ static void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
530 | 563 | ||
531 | static void xen_set_pud(pud_t *ptr, pud_t val) | 564 | static void xen_set_pud(pud_t *ptr, pud_t val) |
532 | { | 565 | { |
566 | trace_xen_mmu_set_pud(ptr, val); | ||
567 | |||
533 | /* If page is not pinned, we can just update the entry | 568 | /* If page is not pinned, we can just update the entry |
534 | directly */ | 569 | directly */ |
535 | if (!xen_page_pinned(ptr)) { | 570 | if (!xen_page_pinned(ptr)) { |
@@ -543,17 +578,20 @@ static void xen_set_pud(pud_t *ptr, pud_t val) | |||
543 | #ifdef CONFIG_X86_PAE | 578 | #ifdef CONFIG_X86_PAE |
544 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | 579 | static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
545 | { | 580 | { |
581 | trace_xen_mmu_set_pte_atomic(ptep, pte); | ||
546 | set_64bit((u64 *)ptep, native_pte_val(pte)); | 582 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
547 | } | 583 | } |
548 | 584 | ||
549 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 585 | static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
550 | { | 586 | { |
587 | trace_xen_mmu_pte_clear(mm, addr, ptep); | ||
551 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) | 588 | if (!xen_batched_set_pte(ptep, native_make_pte(0))) |
552 | native_pte_clear(mm, addr, ptep); | 589 | native_pte_clear(mm, addr, ptep); |
553 | } | 590 | } |
554 | 591 | ||
555 | static void xen_pmd_clear(pmd_t *pmdp) | 592 | static void xen_pmd_clear(pmd_t *pmdp) |
556 | { | 593 | { |
594 | trace_xen_mmu_pmd_clear(pmdp); | ||
557 | set_pmd(pmdp, __pmd(0)); | 595 | set_pmd(pmdp, __pmd(0)); |
558 | } | 596 | } |
559 | #endif /* CONFIG_X86_PAE */ | 597 | #endif /* CONFIG_X86_PAE */ |
@@ -629,6 +667,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
629 | { | 667 | { |
630 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | 668 | pgd_t *user_ptr = xen_get_user_pgd(ptr); |
631 | 669 | ||
670 | trace_xen_mmu_set_pgd(ptr, user_ptr, val); | ||
671 | |||
632 | /* If page is not pinned, we can just update the entry | 672 | /* If page is not pinned, we can just update the entry |
633 | directly */ | 673 | directly */ |
634 | if (!xen_page_pinned(ptr)) { | 674 | if (!xen_page_pinned(ptr)) { |
@@ -788,14 +828,12 @@ static void xen_pte_unlock(void *v) | |||
788 | 828 | ||
789 | static void xen_do_pin(unsigned level, unsigned long pfn) | 829 | static void xen_do_pin(unsigned level, unsigned long pfn) |
790 | { | 830 | { |
791 | struct mmuext_op *op; | 831 | struct mmuext_op op; |
792 | struct multicall_space mcs; | ||
793 | 832 | ||
794 | mcs = __xen_mc_entry(sizeof(*op)); | 833 | op.cmd = level; |
795 | op = mcs.args; | 834 | op.arg1.mfn = pfn_to_mfn(pfn); |
796 | op->cmd = level; | 835 | |
797 | op->arg1.mfn = pfn_to_mfn(pfn); | 836 | xen_extend_mmuext_op(&op); |
798 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | ||
799 | } | 837 | } |
800 | 838 | ||
801 | static int xen_pin_page(struct mm_struct *mm, struct page *page, | 839 | static int xen_pin_page(struct mm_struct *mm, struct page *page, |
@@ -863,6 +901,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, | |||
863 | read-only, and can be pinned. */ | 901 | read-only, and can be pinned. */ |
864 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) | 902 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) |
865 | { | 903 | { |
904 | trace_xen_mmu_pgd_pin(mm, pgd); | ||
905 | |||
866 | xen_mc_batch(); | 906 | xen_mc_batch(); |
867 | 907 | ||
868 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { | 908 | if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { |
@@ -988,6 +1028,8 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page, | |||
988 | /* Release a pagetables pages back as normal RW */ | 1028 | /* Release a pagetables pages back as normal RW */ |
989 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) | 1029 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) |
990 | { | 1030 | { |
1031 | trace_xen_mmu_pgd_unpin(mm, pgd); | ||
1032 | |||
991 | xen_mc_batch(); | 1033 | xen_mc_batch(); |
992 | 1034 | ||
993 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 1035 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
@@ -1196,6 +1238,8 @@ static void xen_flush_tlb(void) | |||
1196 | struct mmuext_op *op; | 1238 | struct mmuext_op *op; |
1197 | struct multicall_space mcs; | 1239 | struct multicall_space mcs; |
1198 | 1240 | ||
1241 | trace_xen_mmu_flush_tlb(0); | ||
1242 | |||
1199 | preempt_disable(); | 1243 | preempt_disable(); |
1200 | 1244 | ||
1201 | mcs = xen_mc_entry(sizeof(*op)); | 1245 | mcs = xen_mc_entry(sizeof(*op)); |
@@ -1214,6 +1258,8 @@ static void xen_flush_tlb_single(unsigned long addr) | |||
1214 | struct mmuext_op *op; | 1258 | struct mmuext_op *op; |
1215 | struct multicall_space mcs; | 1259 | struct multicall_space mcs; |
1216 | 1260 | ||
1261 | trace_xen_mmu_flush_tlb_single(addr); | ||
1262 | |||
1217 | preempt_disable(); | 1263 | preempt_disable(); |
1218 | 1264 | ||
1219 | mcs = xen_mc_entry(sizeof(*op)); | 1265 | mcs = xen_mc_entry(sizeof(*op)); |
@@ -1240,6 +1286,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1240 | } *args; | 1286 | } *args; |
1241 | struct multicall_space mcs; | 1287 | struct multicall_space mcs; |
1242 | 1288 | ||
1289 | trace_xen_mmu_flush_tlb_others(cpus, mm, va); | ||
1290 | |||
1243 | if (cpumask_empty(cpus)) | 1291 | if (cpumask_empty(cpus)) |
1244 | return; /* nothing to do */ | 1292 | return; /* nothing to do */ |
1245 | 1293 | ||
@@ -1275,10 +1323,11 @@ static void set_current_cr3(void *v) | |||
1275 | 1323 | ||
1276 | static void __xen_write_cr3(bool kernel, unsigned long cr3) | 1324 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
1277 | { | 1325 | { |
1278 | struct mmuext_op *op; | 1326 | struct mmuext_op op; |
1279 | struct multicall_space mcs; | ||
1280 | unsigned long mfn; | 1327 | unsigned long mfn; |
1281 | 1328 | ||
1329 | trace_xen_mmu_write_cr3(kernel, cr3); | ||
1330 | |||
1282 | if (cr3) | 1331 | if (cr3) |
1283 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | 1332 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); |
1284 | else | 1333 | else |
@@ -1286,13 +1335,10 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) | |||
1286 | 1335 | ||
1287 | WARN_ON(mfn == 0 && kernel); | 1336 | WARN_ON(mfn == 0 && kernel); |
1288 | 1337 | ||
1289 | mcs = __xen_mc_entry(sizeof(*op)); | 1338 | op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; |
1290 | 1339 | op.arg1.mfn = mfn; | |
1291 | op = mcs.args; | ||
1292 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; | ||
1293 | op->arg1.mfn = mfn; | ||
1294 | 1340 | ||
1295 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 1341 | xen_extend_mmuext_op(&op); |
1296 | 1342 | ||
1297 | if (kernel) { | 1343 | if (kernel) { |
1298 | percpu_write(xen_cr3, cr3); | 1344 | percpu_write(xen_cr3, cr3); |
@@ -1451,19 +1497,52 @@ static void __init xen_release_pmd_init(unsigned long pfn) | |||
1451 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1497 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
1452 | } | 1498 | } |
1453 | 1499 | ||
1500 | static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | ||
1501 | { | ||
1502 | struct multicall_space mcs; | ||
1503 | struct mmuext_op *op; | ||
1504 | |||
1505 | mcs = __xen_mc_entry(sizeof(*op)); | ||
1506 | op = mcs.args; | ||
1507 | op->cmd = cmd; | ||
1508 | op->arg1.mfn = pfn_to_mfn(pfn); | ||
1509 | |||
1510 | MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
1511 | } | ||
1512 | |||
1513 | static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) | ||
1514 | { | ||
1515 | struct multicall_space mcs; | ||
1516 | unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
1517 | |||
1518 | mcs = __xen_mc_entry(0); | ||
1519 | MULTI_update_va_mapping(mcs.mc, (unsigned long)addr, | ||
1520 | pfn_pte(pfn, prot), 0); | ||
1521 | } | ||
1522 | |||
1454 | /* This needs to make sure the new pte page is pinned iff its being | 1523 | /* This needs to make sure the new pte page is pinned iff its being |
1455 | attached to a pinned pagetable. */ | 1524 | attached to a pinned pagetable. */ |
1456 | static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) | 1525 | static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, |
1526 | unsigned level) | ||
1457 | { | 1527 | { |
1458 | struct page *page = pfn_to_page(pfn); | 1528 | bool pinned = PagePinned(virt_to_page(mm->pgd)); |
1529 | |||
1530 | trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); | ||
1531 | |||
1532 | if (pinned) { | ||
1533 | struct page *page = pfn_to_page(pfn); | ||
1459 | 1534 | ||
1460 | if (PagePinned(virt_to_page(mm->pgd))) { | ||
1461 | SetPagePinned(page); | 1535 | SetPagePinned(page); |
1462 | 1536 | ||
1463 | if (!PageHighMem(page)) { | 1537 | if (!PageHighMem(page)) { |
1464 | make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); | 1538 | xen_mc_batch(); |
1539 | |||
1540 | __set_pfn_prot(pfn, PAGE_KERNEL_RO); | ||
1541 | |||
1465 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 1542 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
1466 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); | 1543 | __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
1544 | |||
1545 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1467 | } else { | 1546 | } else { |
1468 | /* make sure there are no stray mappings of | 1547 | /* make sure there are no stray mappings of |
1469 | this page */ | 1548 | this page */ |
@@ -1483,15 +1562,23 @@ static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) | |||
1483 | } | 1562 | } |
1484 | 1563 | ||
1485 | /* This should never happen until we're OK to use struct page */ | 1564 | /* This should never happen until we're OK to use struct page */ |
1486 | static void xen_release_ptpage(unsigned long pfn, unsigned level) | 1565 | static inline void xen_release_ptpage(unsigned long pfn, unsigned level) |
1487 | { | 1566 | { |
1488 | struct page *page = pfn_to_page(pfn); | 1567 | struct page *page = pfn_to_page(pfn); |
1568 | bool pinned = PagePinned(page); | ||
1489 | 1569 | ||
1490 | if (PagePinned(page)) { | 1570 | trace_xen_mmu_release_ptpage(pfn, level, pinned); |
1571 | |||
1572 | if (pinned) { | ||
1491 | if (!PageHighMem(page)) { | 1573 | if (!PageHighMem(page)) { |
1574 | xen_mc_batch(); | ||
1575 | |||
1492 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) | 1576 | if (level == PT_PTE && USE_SPLIT_PTLOCKS) |
1493 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | 1577 | __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); |
1494 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 1578 | |
1579 | __set_pfn_prot(pfn, PAGE_KERNEL); | ||
1580 | |||
1581 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
1495 | } | 1582 | } |
1496 | ClearPagePinned(page); | 1583 | ClearPagePinned(page); |
1497 | } | 1584 | } |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 1b2b73ff0a6e..0d82003e76ad 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -30,12 +30,13 @@ | |||
30 | 30 | ||
31 | #define MC_BATCH 32 | 31 | #define MC_BATCH 32 |
32 | 32 | ||
33 | #define MC_DEBUG 1 | 33 | #define MC_DEBUG 0 |
34 | 34 | ||
35 | #define MC_ARGS (MC_BATCH * 16) | 35 | #define MC_ARGS (MC_BATCH * 16) |
36 | 36 | ||
37 | 37 | ||
38 | struct mc_buffer { | 38 | struct mc_buffer { |
39 | unsigned mcidx, argidx, cbidx; | ||
39 | struct multicall_entry entries[MC_BATCH]; | 40 | struct multicall_entry entries[MC_BATCH]; |
40 | #if MC_DEBUG | 41 | #if MC_DEBUG |
41 | struct multicall_entry debug[MC_BATCH]; | 42 | struct multicall_entry debug[MC_BATCH]; |
@@ -46,85 +47,15 @@ struct mc_buffer { | |||
46 | void (*fn)(void *); | 47 | void (*fn)(void *); |
47 | void *data; | 48 | void *data; |
48 | } callbacks[MC_BATCH]; | 49 | } callbacks[MC_BATCH]; |
49 | unsigned mcidx, argidx, cbidx; | ||
50 | }; | 50 | }; |
51 | 51 | ||
52 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); | 52 | static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); |
53 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); | 53 | DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); |
54 | 54 | ||
55 | /* flush reasons 0- slots, 1- args, 2- callbacks */ | ||
56 | enum flush_reasons | ||
57 | { | ||
58 | FL_SLOTS, | ||
59 | FL_ARGS, | ||
60 | FL_CALLBACKS, | ||
61 | |||
62 | FL_N_REASONS | ||
63 | }; | ||
64 | |||
65 | #ifdef CONFIG_XEN_DEBUG_FS | ||
66 | #define NHYPERCALLS 40 /* not really */ | ||
67 | |||
68 | static struct { | ||
69 | unsigned histo[MC_BATCH+1]; | ||
70 | |||
71 | unsigned issued; | ||
72 | unsigned arg_total; | ||
73 | unsigned hypercalls; | ||
74 | unsigned histo_hypercalls[NHYPERCALLS]; | ||
75 | |||
76 | unsigned flush[FL_N_REASONS]; | ||
77 | } mc_stats; | ||
78 | |||
79 | static u8 zero_stats; | ||
80 | |||
81 | static inline void check_zero(void) | ||
82 | { | ||
83 | if (unlikely(zero_stats)) { | ||
84 | memset(&mc_stats, 0, sizeof(mc_stats)); | ||
85 | zero_stats = 0; | ||
86 | } | ||
87 | } | ||
88 | |||
89 | static void mc_add_stats(const struct mc_buffer *mc) | ||
90 | { | ||
91 | int i; | ||
92 | |||
93 | check_zero(); | ||
94 | |||
95 | mc_stats.issued++; | ||
96 | mc_stats.hypercalls += mc->mcidx; | ||
97 | mc_stats.arg_total += mc->argidx; | ||
98 | |||
99 | mc_stats.histo[mc->mcidx]++; | ||
100 | for(i = 0; i < mc->mcidx; i++) { | ||
101 | unsigned op = mc->entries[i].op; | ||
102 | if (op < NHYPERCALLS) | ||
103 | mc_stats.histo_hypercalls[op]++; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static void mc_stats_flush(enum flush_reasons idx) | ||
108 | { | ||
109 | check_zero(); | ||
110 | |||
111 | mc_stats.flush[idx]++; | ||
112 | } | ||
113 | |||
114 | #else /* !CONFIG_XEN_DEBUG_FS */ | ||
115 | |||
116 | static inline void mc_add_stats(const struct mc_buffer *mc) | ||
117 | { | ||
118 | } | ||
119 | |||
120 | static inline void mc_stats_flush(enum flush_reasons idx) | ||
121 | { | ||
122 | } | ||
123 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
124 | |||
125 | void xen_mc_flush(void) | 55 | void xen_mc_flush(void) |
126 | { | 56 | { |
127 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 57 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
58 | struct multicall_entry *mc; | ||
128 | int ret = 0; | 59 | int ret = 0; |
129 | unsigned long flags; | 60 | unsigned long flags; |
130 | int i; | 61 | int i; |
@@ -135,9 +66,26 @@ void xen_mc_flush(void) | |||
135 | something in the middle */ | 66 | something in the middle */ |
136 | local_irq_save(flags); | 67 | local_irq_save(flags); |
137 | 68 | ||
138 | mc_add_stats(b); | 69 | trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); |
70 | |||
71 | switch (b->mcidx) { | ||
72 | case 0: | ||
73 | /* no-op */ | ||
74 | BUG_ON(b->argidx != 0); | ||
75 | break; | ||
76 | |||
77 | case 1: | ||
78 | /* Singleton multicall - bypass multicall machinery | ||
79 | and just do the call directly. */ | ||
80 | mc = &b->entries[0]; | ||
81 | |||
82 | mc->result = privcmd_call(mc->op, | ||
83 | mc->args[0], mc->args[1], mc->args[2], | ||
84 | mc->args[3], mc->args[4]); | ||
85 | ret = mc->result < 0; | ||
86 | break; | ||
139 | 87 | ||
140 | if (b->mcidx) { | 88 | default: |
141 | #if MC_DEBUG | 89 | #if MC_DEBUG |
142 | memcpy(b->debug, b->entries, | 90 | memcpy(b->debug, b->entries, |
143 | b->mcidx * sizeof(struct multicall_entry)); | 91 | b->mcidx * sizeof(struct multicall_entry)); |
@@ -164,11 +112,10 @@ void xen_mc_flush(void) | |||
164 | } | 112 | } |
165 | } | 113 | } |
166 | #endif | 114 | #endif |
115 | } | ||
167 | 116 | ||
168 | b->mcidx = 0; | 117 | b->mcidx = 0; |
169 | b->argidx = 0; | 118 | b->argidx = 0; |
170 | } else | ||
171 | BUG_ON(b->argidx != 0); | ||
172 | 119 | ||
173 | for (i = 0; i < b->cbidx; i++) { | 120 | for (i = 0; i < b->cbidx; i++) { |
174 | struct callback *cb = &b->callbacks[i]; | 121 | struct callback *cb = &b->callbacks[i]; |
@@ -188,18 +135,21 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
188 | struct multicall_space ret; | 135 | struct multicall_space ret; |
189 | unsigned argidx = roundup(b->argidx, sizeof(u64)); | 136 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
190 | 137 | ||
138 | trace_xen_mc_entry_alloc(args); | ||
139 | |||
191 | BUG_ON(preemptible()); | 140 | BUG_ON(preemptible()); |
192 | BUG_ON(b->argidx >= MC_ARGS); | 141 | BUG_ON(b->argidx >= MC_ARGS); |
193 | 142 | ||
194 | if (b->mcidx == MC_BATCH || | 143 | if (unlikely(b->mcidx == MC_BATCH || |
195 | (argidx + args) >= MC_ARGS) { | 144 | (argidx + args) >= MC_ARGS)) { |
196 | mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS); | 145 | trace_xen_mc_flush_reason((b->mcidx == MC_BATCH) ? |
146 | XEN_MC_FL_BATCH : XEN_MC_FL_ARGS); | ||
197 | xen_mc_flush(); | 147 | xen_mc_flush(); |
198 | argidx = roundup(b->argidx, sizeof(u64)); | 148 | argidx = roundup(b->argidx, sizeof(u64)); |
199 | } | 149 | } |
200 | 150 | ||
201 | ret.mc = &b->entries[b->mcidx]; | 151 | ret.mc = &b->entries[b->mcidx]; |
202 | #ifdef MC_DEBUG | 152 | #if MC_DEBUG |
203 | b->caller[b->mcidx] = __builtin_return_address(0); | 153 | b->caller[b->mcidx] = __builtin_return_address(0); |
204 | #endif | 154 | #endif |
205 | b->mcidx++; | 155 | b->mcidx++; |
@@ -218,20 +168,25 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) | |||
218 | BUG_ON(preemptible()); | 168 | BUG_ON(preemptible()); |
219 | BUG_ON(b->argidx >= MC_ARGS); | 169 | BUG_ON(b->argidx >= MC_ARGS); |
220 | 170 | ||
221 | if (b->mcidx == 0) | 171 | if (unlikely(b->mcidx == 0 || |
222 | return ret; | 172 | b->entries[b->mcidx - 1].op != op)) { |
223 | 173 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_BAD_OP); | |
224 | if (b->entries[b->mcidx - 1].op != op) | 174 | goto out; |
225 | return ret; | 175 | } |
226 | 176 | ||
227 | if ((b->argidx + size) >= MC_ARGS) | 177 | if (unlikely((b->argidx + size) >= MC_ARGS)) { |
228 | return ret; | 178 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_NO_SPACE); |
179 | goto out; | ||
180 | } | ||
229 | 181 | ||
230 | ret.mc = &b->entries[b->mcidx - 1]; | 182 | ret.mc = &b->entries[b->mcidx - 1]; |
231 | ret.args = &b->args[b->argidx]; | 183 | ret.args = &b->args[b->argidx]; |
232 | b->argidx += size; | 184 | b->argidx += size; |
233 | 185 | ||
234 | BUG_ON(b->argidx >= MC_ARGS); | 186 | BUG_ON(b->argidx >= MC_ARGS); |
187 | |||
188 | trace_xen_mc_extend_args(op, size, XEN_MC_XE_OK); | ||
189 | out: | ||
235 | return ret; | 190 | return ret; |
236 | } | 191 | } |
237 | 192 | ||
@@ -241,43 +196,13 @@ void xen_mc_callback(void (*fn)(void *), void *data) | |||
241 | struct callback *cb; | 196 | struct callback *cb; |
242 | 197 | ||
243 | if (b->cbidx == MC_BATCH) { | 198 | if (b->cbidx == MC_BATCH) { |
244 | mc_stats_flush(FL_CALLBACKS); | 199 | trace_xen_mc_flush_reason(XEN_MC_FL_CALLBACK); |
245 | xen_mc_flush(); | 200 | xen_mc_flush(); |
246 | } | 201 | } |
247 | 202 | ||
203 | trace_xen_mc_callback(fn, data); | ||
204 | |||
248 | cb = &b->callbacks[b->cbidx++]; | 205 | cb = &b->callbacks[b->cbidx++]; |
249 | cb->fn = fn; | 206 | cb->fn = fn; |
250 | cb->data = data; | 207 | cb->data = data; |
251 | } | 208 | } |
252 | |||
253 | #ifdef CONFIG_XEN_DEBUG_FS | ||
254 | |||
255 | static struct dentry *d_mc_debug; | ||
256 | |||
257 | static int __init xen_mc_debugfs(void) | ||
258 | { | ||
259 | struct dentry *d_xen = xen_init_debugfs(); | ||
260 | |||
261 | if (d_xen == NULL) | ||
262 | return -ENOMEM; | ||
263 | |||
264 | d_mc_debug = debugfs_create_dir("multicalls", d_xen); | ||
265 | |||
266 | debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats); | ||
267 | |||
268 | debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued); | ||
269 | debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls); | ||
270 | debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total); | ||
271 | |||
272 | xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug, | ||
273 | mc_stats.histo, MC_BATCH); | ||
274 | xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug, | ||
275 | mc_stats.histo_hypercalls, NHYPERCALLS); | ||
276 | xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug, | ||
277 | mc_stats.flush, FL_N_REASONS); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | fs_initcall(xen_mc_debugfs); | ||
282 | |||
283 | #endif /* CONFIG_XEN_DEBUG_FS */ | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 4ec8035e3216..dee79b78a90f 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _XEN_MULTICALLS_H | 1 | #ifndef _XEN_MULTICALLS_H |
2 | #define _XEN_MULTICALLS_H | 2 | #define _XEN_MULTICALLS_H |
3 | 3 | ||
4 | #include <trace/events/xen.h> | ||
5 | |||
4 | #include "xen-ops.h" | 6 | #include "xen-ops.h" |
5 | 7 | ||
6 | /* Multicalls */ | 8 | /* Multicalls */ |
@@ -20,8 +22,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); | |||
20 | static inline void xen_mc_batch(void) | 22 | static inline void xen_mc_batch(void) |
21 | { | 23 | { |
22 | unsigned long flags; | 24 | unsigned long flags; |
25 | |||
23 | /* need to disable interrupts until this entry is complete */ | 26 | /* need to disable interrupts until this entry is complete */ |
24 | local_irq_save(flags); | 27 | local_irq_save(flags); |
28 | trace_xen_mc_batch(paravirt_get_lazy_mode()); | ||
25 | __this_cpu_write(xen_mc_irq_flags, flags); | 29 | __this_cpu_write(xen_mc_irq_flags, flags); |
26 | } | 30 | } |
27 | 31 | ||
@@ -37,6 +41,8 @@ void xen_mc_flush(void); | |||
37 | /* Issue a multicall if we're not in a lazy mode */ | 41 | /* Issue a multicall if we're not in a lazy mode */ |
38 | static inline void xen_mc_issue(unsigned mode) | 42 | static inline void xen_mc_issue(unsigned mode) |
39 | { | 43 | { |
44 | trace_xen_mc_issue(mode); | ||
45 | |||
40 | if ((paravirt_get_lazy_mode() & mode) == 0) | 46 | if ((paravirt_get_lazy_mode() & mode) == 0) |
41 | xen_mc_flush(); | 47 | xen_mc_flush(); |
42 | 48 | ||
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 25c52f94a27c..ffcf2615640b 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | |||
35 | #ifdef CONFIG_XEN_PVHVM | 35 | #ifdef CONFIG_XEN_PVHVM |
36 | static int xen_emul_unplug; | 36 | static int xen_emul_unplug; |
37 | 37 | ||
38 | static int __init check_platform_magic(void) | 38 | static int check_platform_magic(void) |
39 | { | 39 | { |
40 | short magic; | 40 | short magic; |
41 | char protocol; | 41 | char protocol; |
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c new file mode 100644 index 000000000000..734beba2a08c --- /dev/null +++ b/arch/x86/xen/trace.c | |||
@@ -0,0 +1,61 @@ | |||
1 | #include <linux/ftrace.h> | ||
2 | |||
3 | #define N(x) [__HYPERVISOR_##x] = "("#x")" | ||
4 | static const char *xen_hypercall_names[] = { | ||
5 | N(set_trap_table), | ||
6 | N(mmu_update), | ||
7 | N(set_gdt), | ||
8 | N(stack_switch), | ||
9 | N(set_callbacks), | ||
10 | N(fpu_taskswitch), | ||
11 | N(sched_op_compat), | ||
12 | N(dom0_op), | ||
13 | N(set_debugreg), | ||
14 | N(get_debugreg), | ||
15 | N(update_descriptor), | ||
16 | N(memory_op), | ||
17 | N(multicall), | ||
18 | N(update_va_mapping), | ||
19 | N(set_timer_op), | ||
20 | N(event_channel_op_compat), | ||
21 | N(xen_version), | ||
22 | N(console_io), | ||
23 | N(physdev_op_compat), | ||
24 | N(grant_table_op), | ||
25 | N(vm_assist), | ||
26 | N(update_va_mapping_otherdomain), | ||
27 | N(iret), | ||
28 | N(vcpu_op), | ||
29 | N(set_segment_base), | ||
30 | N(mmuext_op), | ||
31 | N(acm_op), | ||
32 | N(nmi_op), | ||
33 | N(sched_op), | ||
34 | N(callback_op), | ||
35 | N(xenoprof_op), | ||
36 | N(event_channel_op), | ||
37 | N(physdev_op), | ||
38 | N(hvm_op), | ||
39 | |||
40 | /* Architecture-specific hypercall definitions. */ | ||
41 | N(arch_0), | ||
42 | N(arch_1), | ||
43 | N(arch_2), | ||
44 | N(arch_3), | ||
45 | N(arch_4), | ||
46 | N(arch_5), | ||
47 | N(arch_6), | ||
48 | N(arch_7), | ||
49 | }; | ||
50 | #undef N | ||
51 | |||
52 | static const char *xen_hypercall_name(unsigned op) | ||
53 | { | ||
54 | if (op < ARRAY_SIZE(xen_hypercall_names) && xen_hypercall_names[op] != NULL) | ||
55 | return xen_hypercall_names[op]; | ||
56 | |||
57 | return ""; | ||
58 | } | ||
59 | |||
60 | #define CREATE_TRACE_POINTS | ||
61 | #include <trace/events/xen.h> | ||
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c new file mode 100644 index 000000000000..1cd7f4d11e29 --- /dev/null +++ b/arch/x86/xen/vga.c | |||
@@ -0,0 +1,67 @@ | |||
1 | #include <linux/screen_info.h> | ||
2 | #include <linux/init.h> | ||
3 | |||
4 | #include <asm/bootparam.h> | ||
5 | #include <asm/setup.h> | ||
6 | |||
7 | #include <xen/interface/xen.h> | ||
8 | |||
9 | #include "xen-ops.h" | ||
10 | |||
11 | void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) | ||
12 | { | ||
13 | struct screen_info *screen_info = &boot_params.screen_info; | ||
14 | |||
15 | /* This is drawn from a dump from vgacon:startup in | ||
16 | * standard Linux. */ | ||
17 | screen_info->orig_video_mode = 3; | ||
18 | screen_info->orig_video_isVGA = 1; | ||
19 | screen_info->orig_video_lines = 25; | ||
20 | screen_info->orig_video_cols = 80; | ||
21 | screen_info->orig_video_ega_bx = 3; | ||
22 | screen_info->orig_video_points = 16; | ||
23 | screen_info->orig_y = screen_info->orig_video_lines - 1; | ||
24 | |||
25 | switch (info->video_type) { | ||
26 | case XEN_VGATYPE_TEXT_MODE_3: | ||
27 | if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3) | ||
28 | + sizeof(info->u.text_mode_3)) | ||
29 | break; | ||
30 | screen_info->orig_video_lines = info->u.text_mode_3.rows; | ||
31 | screen_info->orig_video_cols = info->u.text_mode_3.columns; | ||
32 | screen_info->orig_x = info->u.text_mode_3.cursor_x; | ||
33 | screen_info->orig_y = info->u.text_mode_3.cursor_y; | ||
34 | screen_info->orig_video_points = | ||
35 | info->u.text_mode_3.font_height; | ||
36 | break; | ||
37 | |||
38 | case XEN_VGATYPE_VESA_LFB: | ||
39 | if (size < offsetof(struct dom0_vga_console_info, | ||
40 | u.vesa_lfb.gbl_caps)) | ||
41 | break; | ||
42 | screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB; | ||
43 | screen_info->lfb_width = info->u.vesa_lfb.width; | ||
44 | screen_info->lfb_height = info->u.vesa_lfb.height; | ||
45 | screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel; | ||
46 | screen_info->lfb_base = info->u.vesa_lfb.lfb_base; | ||
47 | screen_info->lfb_size = info->u.vesa_lfb.lfb_size; | ||
48 | screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line; | ||
49 | screen_info->red_size = info->u.vesa_lfb.red_size; | ||
50 | screen_info->red_pos = info->u.vesa_lfb.red_pos; | ||
51 | screen_info->green_size = info->u.vesa_lfb.green_size; | ||
52 | screen_info->green_pos = info->u.vesa_lfb.green_pos; | ||
53 | screen_info->blue_size = info->u.vesa_lfb.blue_size; | ||
54 | screen_info->blue_pos = info->u.vesa_lfb.blue_pos; | ||
55 | screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; | ||
56 | screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; | ||
57 | if (size >= offsetof(struct dom0_vga_console_info, | ||
58 | u.vesa_lfb.gbl_caps) | ||
59 | + sizeof(info->u.vesa_lfb.gbl_caps)) | ||
60 | screen_info->capabilities = info->u.vesa_lfb.gbl_caps; | ||
61 | if (size >= offsetof(struct dom0_vga_console_info, | ||
62 | u.vesa_lfb.mode_attrs) | ||
63 | + sizeof(info->u.vesa_lfb.mode_attrs)) | ||
64 | screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; | ||
65 | break; | ||
66 | } | ||
67 | } | ||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 97dfdc8757b3..b095739ccd4c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -88,6 +88,17 @@ static inline void xen_uninit_lock_cpu(int cpu) | |||
88 | } | 88 | } |
89 | #endif | 89 | #endif |
90 | 90 | ||
91 | struct dom0_vga_console_info; | ||
92 | |||
93 | #ifdef CONFIG_XEN_DOM0 | ||
94 | void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); | ||
95 | #else | ||
96 | static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, | ||
97 | size_t size) | ||
98 | { | ||
99 | } | ||
100 | #endif | ||
101 | |||
91 | /* Declare an asm function, along with symbols needed to make it | 102 | /* Declare an asm function, along with symbols needed to make it |
92 | inlineable */ | 103 | inlineable */ |
93 | #define DECL_ASM(ret, name, ...) \ | 104 | #define DECL_ASM(ret, name, ...) \ |