aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-04-23 05:10:28 -0400
committerIngo Molnar <mingo@elte.hu>2010-04-23 05:10:30 -0400
commit70bce3ba77540ebe77b8c0e1ac38d281a23fbb5e (patch)
tree34b09a49228f0949ff49dce66a433b0dfd83a2dc /arch/x86
parent6eca8cc35b50af1037bc919106dd6dd332c959c2 (diff)
parentd5a30458a90597915977f06e79406b664a41b8ac (diff)
Merge branch 'linus' into perf/core
Merge reason: merge the latest fixes, update to latest -rc. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h3
-rw-r--r--arch/x86/include/asm/lguest_hcall.h29
-rw-r--r--arch/x86/kernel/amd_iommu.c20
-rw-r--r--arch/x86/kernel/amd_iommu_init.c48
-rw-r--r--arch/x86/kernel/aperture_64.c15
-rw-r--r--arch/x86/kernel/crash.c6
-rw-r--r--arch/x86/kernel/dumpstack.h8
-rw-r--r--arch/x86/kernel/pci-gart_64.c3
-rw-r--r--arch/x86/kvm/mmu.c11
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/lguest/boot.c61
-rw-r--r--arch/x86/lguest/i386_head.S2
15 files changed, 181 insertions, 124 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 59b4556a5b92..e790bc1fbfa3 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -626,7 +626,7 @@ ia32_sys_call_table:
626 .quad stub32_sigreturn 626 .quad stub32_sigreturn
627 .quad stub32_clone /* 120 */ 627 .quad stub32_clone /* 120 */
628 .quad sys_setdomainname 628 .quad sys_setdomainname
629 .quad sys_uname 629 .quad sys_newuname
630 .quad sys_modify_ldt 630 .quad sys_modify_ldt
631 .quad compat_sys_adjtimex 631 .quad compat_sys_adjtimex
632 .quad sys32_mprotect /* 125 */ 632 .quad sys32_mprotect /* 125 */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ba19ad4c47d0..86a0ff0aeac7 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -21,6 +21,7 @@
21#define _ASM_X86_AMD_IOMMU_TYPES_H 21#define _ASM_X86_AMD_IOMMU_TYPES_H
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/mutex.h>
24#include <linux/list.h> 25#include <linux/list.h>
25#include <linux/spinlock.h> 26#include <linux/spinlock.h>
26 27
@@ -140,6 +141,7 @@
140 141
141/* constants to configure the command buffer */ 142/* constants to configure the command buffer */
142#define CMD_BUFFER_SIZE 8192 143#define CMD_BUFFER_SIZE 8192
144#define CMD_BUFFER_UNINITIALIZED 1
143#define CMD_BUFFER_ENTRIES 512 145#define CMD_BUFFER_ENTRIES 512
144#define MMIO_CMD_SIZE_SHIFT 56 146#define MMIO_CMD_SIZE_SHIFT 56
145#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) 147#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
@@ -237,6 +239,7 @@ struct protection_domain {
237 struct list_head list; /* for list of all protection domains */ 239 struct list_head list; /* for list of all protection domains */
238 struct list_head dev_list; /* List of all devices in this domain */ 240 struct list_head dev_list; /* List of all devices in this domain */
239 spinlock_t lock; /* mostly used to lock the page table*/ 241 spinlock_t lock; /* mostly used to lock the page table*/
242 struct mutex api_lock; /* protect page tables in the iommu-api path */
240 u16 id; /* the domain id written to the device table */ 243 u16 id; /* the domain id written to the device table */
241 int mode; /* paging mode (0-6 levels) */ 244 int mode; /* paging mode (0-6 levels) */
242 u64 *pt_root; /* page table root pointer */ 245 u64 *pt_root; /* page table root pointer */
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index ba0eed8aa1a6..b60f2924c413 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -28,22 +28,39 @@
28 28
29#ifndef __ASSEMBLY__ 29#ifndef __ASSEMBLY__
30#include <asm/hw_irq.h> 30#include <asm/hw_irq.h>
31#include <asm/kvm_para.h>
32 31
33/*G:030 32/*G:030
34 * But first, how does our Guest contact the Host to ask for privileged 33 * But first, how does our Guest contact the Host to ask for privileged
35 * operations? There are two ways: the direct way is to make a "hypercall", 34 * operations? There are two ways: the direct way is to make a "hypercall",
36 * to make requests of the Host Itself. 35 * to make requests of the Host Itself.
37 * 36 *
38 * We use the KVM hypercall mechanism, though completely different hypercall 37 * Our hypercall mechanism uses the highest unused trap code (traps 32 and
39 * numbers. Seventeen hypercalls are available: the hypercall number is put in 38 * above are used by real hardware interrupts). Seventeen hypercalls are
40 * the %eax register, and the arguments (when required) are placed in %ebx, 39 * available: the hypercall number is put in the %eax register, and the
41 * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. 40 * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
41 * If a return value makes sense, it's returned in %eax.
42 * 42 *
43 * Grossly invalid calls result in Sudden Death at the hands of the vengeful 43 * Grossly invalid calls result in Sudden Death at the hands of the vengeful
44 * Host, rather than returning failure. This reflects Winston Churchill's 44 * Host, rather than returning failure. This reflects Winston Churchill's
45 * definition of a gentleman: "someone who is only rude intentionally". 45 * definition of a gentleman: "someone who is only rude intentionally".
46:*/ 46 */
47static inline unsigned long
48hcall(unsigned long call,
49 unsigned long arg1, unsigned long arg2, unsigned long arg3,
50 unsigned long arg4)
51{
52 /* "int" is the Intel instruction to trigger a trap. */
53 asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
54 /* The call in %eax (aka "a") might be overwritten */
55 : "=a"(call)
56 /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */
57 : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4)
58 /* "memory" means this might write somewhere in memory.
59 * This isn't true for all calls, but it's safe to tell
60 * gcc that it might happen so it doesn't get clever. */
61 : "memory");
62 return call;
63}
47 64
48/* Can't use our min() macro here: needs to be a constant */ 65/* Can't use our min() macro here: needs to be a constant */
49#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) 66#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f3dadb571d9b..f854d89b7edf 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -118,7 +118,7 @@ static bool check_device(struct device *dev)
118 return false; 118 return false;
119 119
120 /* No device or no PCI device */ 120 /* No device or no PCI device */
121 if (!dev || dev->bus != &pci_bus_type) 121 if (dev->bus != &pci_bus_type)
122 return false; 122 return false;
123 123
124 devid = get_device_id(dev); 124 devid = get_device_id(dev);
@@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
392 u32 tail, head; 392 u32 tail, head;
393 u8 *target; 393 u8 *target;
394 394
395 WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
395 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 396 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
396 target = iommu->cmd_buf + tail; 397 target = iommu->cmd_buf + tail;
397 memcpy_toio(target, cmd, sizeof(*cmd)); 398 memcpy_toio(target, cmd, sizeof(*cmd));
@@ -2186,7 +2187,7 @@ static void prealloc_protection_domains(void)
2186 struct dma_ops_domain *dma_dom; 2187 struct dma_ops_domain *dma_dom;
2187 u16 devid; 2188 u16 devid;
2188 2189
2189 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2190 for_each_pci_dev(dev) {
2190 2191
2191 /* Do we handle this device? */ 2192 /* Do we handle this device? */
2192 if (!check_device(&dev->dev)) 2193 if (!check_device(&dev->dev))
@@ -2298,7 +2299,7 @@ static void cleanup_domain(struct protection_domain *domain)
2298 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { 2299 list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {
2299 struct device *dev = dev_data->dev; 2300 struct device *dev = dev_data->dev;
2300 2301
2301 do_detach(dev); 2302 __detach_device(dev);
2302 atomic_set(&dev_data->bind, 0); 2303 atomic_set(&dev_data->bind, 0);
2303 } 2304 }
2304 2305
@@ -2327,6 +2328,7 @@ static struct protection_domain *protection_domain_alloc(void)
2327 return NULL; 2328 return NULL;
2328 2329
2329 spin_lock_init(&domain->lock); 2330 spin_lock_init(&domain->lock);
2331 mutex_init(&domain->api_lock);
2330 domain->id = domain_id_alloc(); 2332 domain->id = domain_id_alloc();
2331 if (!domain->id) 2333 if (!domain->id)
2332 goto out_err; 2334 goto out_err;
@@ -2379,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2379 2381
2380 free_pagetable(domain); 2382 free_pagetable(domain);
2381 2383
2382 domain_id_free(domain->id); 2384 protection_domain_free(domain);
2383
2384 kfree(domain);
2385 2385
2386 dom->priv = NULL; 2386 dom->priv = NULL;
2387} 2387}
@@ -2456,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
2456 iova &= PAGE_MASK; 2456 iova &= PAGE_MASK;
2457 paddr &= PAGE_MASK; 2457 paddr &= PAGE_MASK;
2458 2458
2459 mutex_lock(&domain->api_lock);
2460
2459 for (i = 0; i < npages; ++i) { 2461 for (i = 0; i < npages; ++i) {
2460 ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); 2462 ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
2461 if (ret) 2463 if (ret)
@@ -2465,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
2465 paddr += PAGE_SIZE; 2467 paddr += PAGE_SIZE;
2466 } 2468 }
2467 2469
2470 mutex_unlock(&domain->api_lock);
2471
2468 return 0; 2472 return 0;
2469} 2473}
2470 2474
@@ -2477,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
2477 2481
2478 iova &= PAGE_MASK; 2482 iova &= PAGE_MASK;
2479 2483
2484 mutex_lock(&domain->api_lock);
2485
2480 for (i = 0; i < npages; ++i) { 2486 for (i = 0; i < npages; ++i) {
2481 iommu_unmap_page(domain, iova, PM_MAP_4k); 2487 iommu_unmap_page(domain, iova, PM_MAP_4k);
2482 iova += PAGE_SIZE; 2488 iova += PAGE_SIZE;
2483 } 2489 }
2484 2490
2485 iommu_flush_tlb_pde(domain); 2491 iommu_flush_tlb_pde(domain);
2492
2493 mutex_unlock(&domain->api_lock);
2486} 2494}
2487 2495
2488static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, 2496static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 42f5350b908f..6360abf993d4 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -138,9 +138,9 @@ int amd_iommus_present;
138bool amd_iommu_np_cache __read_mostly; 138bool amd_iommu_np_cache __read_mostly;
139 139
140/* 140/*
141 * Set to true if ACPI table parsing and hardware intialization went properly 141 * The ACPI table parsing functions set this variable on an error
142 */ 142 */
143static bool amd_iommu_initialized; 143static int __initdata amd_iommu_init_err;
144 144
145/* 145/*
146 * List of protection domains - used during resume 146 * List of protection domains - used during resume
@@ -391,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
391 */ 391 */
392 for (i = 0; i < table->length; ++i) 392 for (i = 0; i < table->length; ++i)
393 checksum += p[i]; 393 checksum += p[i];
394 if (checksum != 0) 394 if (checksum != 0) {
395 /* ACPI table corrupt */ 395 /* ACPI table corrupt */
396 return -ENODEV; 396 amd_iommu_init_err = -ENODEV;
397 return 0;
398 }
397 399
398 p += IVRS_HEADER_LENGTH; 400 p += IVRS_HEADER_LENGTH;
399 401
@@ -436,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
436 if (cmd_buf == NULL) 438 if (cmd_buf == NULL)
437 return NULL; 439 return NULL;
438 440
439 iommu->cmd_buf_size = CMD_BUFFER_SIZE; 441 iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
440 442
441 return cmd_buf; 443 return cmd_buf;
442} 444}
@@ -472,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
472 &entry, sizeof(entry)); 474 &entry, sizeof(entry));
473 475
474 amd_iommu_reset_cmd_buffer(iommu); 476 amd_iommu_reset_cmd_buffer(iommu);
477 iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
475} 478}
476 479
477static void __init free_command_buffer(struct amd_iommu *iommu) 480static void __init free_command_buffer(struct amd_iommu *iommu)
478{ 481{
479 free_pages((unsigned long)iommu->cmd_buf, 482 free_pages((unsigned long)iommu->cmd_buf,
480 get_order(iommu->cmd_buf_size)); 483 get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
481} 484}
482 485
483/* allocates the memory where the IOMMU will log its events to */ 486/* allocates the memory where the IOMMU will log its events to */
@@ -920,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table)
920 h->mmio_phys); 923 h->mmio_phys);
921 924
922 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); 925 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
923 if (iommu == NULL) 926 if (iommu == NULL) {
924 return -ENOMEM; 927 amd_iommu_init_err = -ENOMEM;
928 return 0;
929 }
930
925 ret = init_iommu_one(iommu, h); 931 ret = init_iommu_one(iommu, h);
926 if (ret) 932 if (ret) {
927 return ret; 933 amd_iommu_init_err = ret;
934 return 0;
935 }
928 break; 936 break;
929 default: 937 default:
930 break; 938 break;
@@ -934,8 +942,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
934 } 942 }
935 WARN_ON(p != end); 943 WARN_ON(p != end);
936 944
937 amd_iommu_initialized = true;
938
939 return 0; 945 return 0;
940} 946}
941 947
@@ -1211,6 +1217,10 @@ static int __init amd_iommu_init(void)
1211 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) 1217 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
1212 return -ENODEV; 1218 return -ENODEV;
1213 1219
1220 ret = amd_iommu_init_err;
1221 if (ret)
1222 goto out;
1223
1214 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); 1224 dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE);
1215 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); 1225 alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
1216 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); 1226 rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
@@ -1270,12 +1280,19 @@ static int __init amd_iommu_init(void)
1270 if (acpi_table_parse("IVRS", init_iommu_all) != 0) 1280 if (acpi_table_parse("IVRS", init_iommu_all) != 0)
1271 goto free; 1281 goto free;
1272 1282
1273 if (!amd_iommu_initialized) 1283 if (amd_iommu_init_err) {
1284 ret = amd_iommu_init_err;
1274 goto free; 1285 goto free;
1286 }
1275 1287
1276 if (acpi_table_parse("IVRS", init_memory_definitions) != 0) 1288 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
1277 goto free; 1289 goto free;
1278 1290
1291 if (amd_iommu_init_err) {
1292 ret = amd_iommu_init_err;
1293 goto free;
1294 }
1295
1279 ret = sysdev_class_register(&amd_iommu_sysdev_class); 1296 ret = sysdev_class_register(&amd_iommu_sysdev_class);
1280 if (ret) 1297 if (ret)
1281 goto free; 1298 goto free;
@@ -1288,6 +1305,8 @@ static int __init amd_iommu_init(void)
1288 if (ret) 1305 if (ret)
1289 goto free; 1306 goto free;
1290 1307
1308 enable_iommus();
1309
1291 if (iommu_pass_through) 1310 if (iommu_pass_through)
1292 ret = amd_iommu_init_passthrough(); 1311 ret = amd_iommu_init_passthrough();
1293 else 1312 else
@@ -1300,8 +1319,6 @@ static int __init amd_iommu_init(void)
1300 1319
1301 amd_iommu_init_notifier(); 1320 amd_iommu_init_notifier();
1302 1321
1303 enable_iommus();
1304
1305 if (iommu_pass_through) 1322 if (iommu_pass_through)
1306 goto out; 1323 goto out;
1307 1324
@@ -1315,6 +1332,7 @@ out:
1315 return ret; 1332 return ret;
1316 1333
1317free: 1334free:
1335 disable_iommus();
1318 1336
1319 amd_iommu_uninit_devices(); 1337 amd_iommu_uninit_devices();
1320 1338
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3704997e8b25..b5d8b0bcf235 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -393,6 +393,7 @@ void __init gart_iommu_hole_init(void)
393 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 393 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
394 int bus; 394 int bus;
395 int dev_base, dev_limit; 395 int dev_base, dev_limit;
396 u32 ctl;
396 397
397 bus = bus_dev_ranges[i].bus; 398 bus = bus_dev_ranges[i].bus;
398 dev_base = bus_dev_ranges[i].dev_base; 399 dev_base = bus_dev_ranges[i].dev_base;
@@ -406,7 +407,19 @@ void __init gart_iommu_hole_init(void)
406 gart_iommu_aperture = 1; 407 gart_iommu_aperture = 1;
407 x86_init.iommu.iommu_init = gart_iommu_init; 408 x86_init.iommu.iommu_init = gart_iommu_init;
408 409
409 aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; 410 ctl = read_pci_config(bus, slot, 3,
411 AMD64_GARTAPERTURECTL);
412
413 /*
414 * Before we do anything else disable the GART. It may
415 * still be enabled if we boot into a crash-kernel here.
416 * Reconfiguring the GART while it is enabled could have
417 * unknown side-effects.
418 */
419 ctl &= ~GARTEN;
420 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
421
422 aper_order = (ctl >> 1) & 7;
410 aper_size = (32 * 1024 * 1024) << aper_order; 423 aper_size = (32 * 1024 * 1024) << aper_order;
411 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; 424 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
412 aper_base <<= 25; 425 aper_base <<= 25;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index a4849c10a77e..ebd4c51d096a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -27,7 +27,6 @@
27#include <asm/cpu.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30#include <asm/x86_init.h>
31 30
32#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 31#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
33 32
@@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
103#ifdef CONFIG_HPET_TIMER 102#ifdef CONFIG_HPET_TIMER
104 hpet_disable(); 103 hpet_disable();
105#endif 104#endif
106
107#ifdef CONFIG_X86_64
108 x86_platform.iommu_shutdown();
109#endif
110
111 crash_save_cpu(regs, safe_smp_processor_id()); 105 crash_save_cpu(regs, safe_smp_processor_id());
112} 106}
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index e39e77168a37..e1a93be4fd44 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -14,6 +14,8 @@
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif 15#endif
16 16
17#include <linux/uaccess.h>
18
17extern void 19extern void
18show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 20show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
19 unsigned long *stack, unsigned long bp, char *log_lvl); 21 unsigned long *stack, unsigned long bp, char *log_lvl);
@@ -42,8 +44,10 @@ static inline unsigned long rewind_frame_pointer(int n)
42 get_bp(frame); 44 get_bp(frame);
43 45
44#ifdef CONFIG_FRAME_POINTER 46#ifdef CONFIG_FRAME_POINTER
45 while (n--) 47 while (n--) {
46 frame = frame->next_frame; 48 if (probe_kernel_address(&frame->next_frame, frame))
49 break;
50 }
47#endif 51#endif
48 52
49 return (unsigned long)frame; 53 return (unsigned long)frame;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 68cd24f9deae..0f7f130caa67 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -565,6 +565,9 @@ static void enable_gart_translations(void)
565 565
566 enable_gart_translation(dev, __pa(agp_gatt_table)); 566 enable_gart_translation(dev, __pa(agp_gatt_table));
567 } 567 }
568
569 /* Flush the GART-TLB to remove stale entries */
570 k8_flush_garts();
568} 571}
569 572
570/* 573/*
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 48aeee8eefb0..19a8906bcaa2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1490,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
1490 for_each_sp(pages, sp, parents, i) { 1490 for_each_sp(pages, sp, parents, i) {
1491 kvm_mmu_zap_page(kvm, sp); 1491 kvm_mmu_zap_page(kvm, sp);
1492 mmu_pages_clear_parents(&parents); 1492 mmu_pages_clear_parents(&parents);
1493 zapped++;
1493 } 1494 }
1494 zapped += pages.nr;
1495 kvm_mmu_pages_init(parent, &parents, &pages); 1495 kvm_mmu_pages_init(parent, &parents, &pages);
1496 } 1496 }
1497 1497
@@ -1542,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1542 */ 1542 */
1543 1543
1544 if (used_pages > kvm_nr_mmu_pages) { 1544 if (used_pages > kvm_nr_mmu_pages) {
1545 while (used_pages > kvm_nr_mmu_pages) { 1545 while (used_pages > kvm_nr_mmu_pages &&
1546 !list_empty(&kvm->arch.active_mmu_pages)) {
1546 struct kvm_mmu_page *page; 1547 struct kvm_mmu_page *page;
1547 1548
1548 page = container_of(kvm->arch.active_mmu_pages.prev, 1549 page = container_of(kvm->arch.active_mmu_pages.prev,
1549 struct kvm_mmu_page, link); 1550 struct kvm_mmu_page, link);
1550 kvm_mmu_zap_page(kvm, page); 1551 used_pages -= kvm_mmu_zap_page(kvm, page);
1551 used_pages--; 1552 used_pages--;
1552 } 1553 }
1554 kvm_nr_mmu_pages = used_pages;
1553 kvm->arch.n_free_mmu_pages = 0; 1555 kvm->arch.n_free_mmu_pages = 0;
1554 } 1556 }
1555 else 1557 else
@@ -1596,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1596 && !sp->role.invalid) { 1598 && !sp->role.invalid) {
1597 pgprintk("%s: zap %lx %x\n", 1599 pgprintk("%s: zap %lx %x\n",
1598 __func__, gfn, sp->role.word); 1600 __func__, gfn, sp->role.word);
1599 kvm_mmu_zap_page(kvm, sp); 1601 if (kvm_mmu_zap_page(kvm, sp))
1602 nn = bucket->first;
1600 } 1603 }
1601 } 1604 }
1602} 1605}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 445c59411ed0..2ba58206812a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -706,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
706 if (err) 706 if (err)
707 goto free_svm; 707 goto free_svm;
708 708
709 err = -ENOMEM;
709 page = alloc_page(GFP_KERNEL); 710 page = alloc_page(GFP_KERNEL);
710 if (!page) { 711 if (!page)
711 err = -ENOMEM;
712 goto uninit; 712 goto uninit;
713 }
714 713
715 err = -ENOMEM;
716 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 714 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
717 if (!msrpm_pages) 715 if (!msrpm_pages)
718 goto uninit; 716 goto free_page1;
719 717
720 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 718 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
721 if (!nested_msrpm_pages) 719 if (!nested_msrpm_pages)
722 goto uninit; 720 goto free_page2;
723
724 svm->msrpm = page_address(msrpm_pages);
725 svm_vcpu_init_msrpm(svm->msrpm);
726 721
727 hsave_page = alloc_page(GFP_KERNEL); 722 hsave_page = alloc_page(GFP_KERNEL);
728 if (!hsave_page) 723 if (!hsave_page)
729 goto uninit; 724 goto free_page3;
725
730 svm->nested.hsave = page_address(hsave_page); 726 svm->nested.hsave = page_address(hsave_page);
731 727
728 svm->msrpm = page_address(msrpm_pages);
729 svm_vcpu_init_msrpm(svm->msrpm);
730
732 svm->nested.msrpm = page_address(nested_msrpm_pages); 731 svm->nested.msrpm = page_address(nested_msrpm_pages);
733 732
734 svm->vmcb = page_address(page); 733 svm->vmcb = page_address(page);
@@ -744,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
744 743
745 return &svm->vcpu; 744 return &svm->vcpu;
746 745
746free_page3:
747 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
748free_page2:
749 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
750free_page1:
751 __free_page(page);
747uninit: 752uninit:
748 kvm_vcpu_uninit(&svm->vcpu); 753 kvm_vcpu_uninit(&svm->vcpu);
749free_svm: 754free_svm:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 82be6dac3d25..32022a8a5c3b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -77,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
79 79
80#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
81
80/* 82/*
81 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 83 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
82 * ple_gap: upper bound on the amount of time between two successive 84 * ple_gap: upper bound on the amount of time between two successive
@@ -131,7 +133,7 @@ struct vcpu_vmx {
131 } host_state; 133 } host_state;
132 struct { 134 struct {
133 int vm86_active; 135 int vm86_active;
134 u8 save_iopl; 136 ulong save_rflags;
135 struct kvm_save_segment { 137 struct kvm_save_segment {
136 u16 selector; 138 u16 selector;
137 unsigned long base; 139 unsigned long base;
@@ -818,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
818 820
819static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 821static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
820{ 822{
821 unsigned long rflags; 823 unsigned long rflags, save_rflags;
822 824
823 rflags = vmcs_readl(GUEST_RFLAGS); 825 rflags = vmcs_readl(GUEST_RFLAGS);
824 if (to_vmx(vcpu)->rmode.vm86_active) 826 if (to_vmx(vcpu)->rmode.vm86_active) {
825 rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 827 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
828 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
829 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
830 }
826 return rflags; 831 return rflags;
827} 832}
828 833
829static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 834static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
830{ 835{
831 if (to_vmx(vcpu)->rmode.vm86_active) 836 if (to_vmx(vcpu)->rmode.vm86_active) {
837 to_vmx(vcpu)->rmode.save_rflags = rflags;
832 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 838 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
839 }
833 vmcs_writel(GUEST_RFLAGS, rflags); 840 vmcs_writel(GUEST_RFLAGS, rflags);
834} 841}
835 842
@@ -1483,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1483 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); 1490 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1484 1491
1485 flags = vmcs_readl(GUEST_RFLAGS); 1492 flags = vmcs_readl(GUEST_RFLAGS);
1486 flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 1493 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1487 flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); 1494 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1488 vmcs_writel(GUEST_RFLAGS, flags); 1495 vmcs_writel(GUEST_RFLAGS, flags);
1489 1496
1490 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 1497 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1557,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1557 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 1564 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1558 1565
1559 flags = vmcs_readl(GUEST_RFLAGS); 1566 flags = vmcs_readl(GUEST_RFLAGS);
1560 vmx->rmode.save_iopl 1567 vmx->rmode.save_rflags = flags;
1561 = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1562 1568
1563 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1569 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1564 1570
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21b9b6aa3e88..73d854c36e39 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -434,8 +434,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
434 434
435#ifdef CONFIG_X86_64 435#ifdef CONFIG_X86_64
436 if (cr0 & 0xffffffff00000000UL) { 436 if (cr0 & 0xffffffff00000000UL) {
437 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
438 cr0, kvm_read_cr0(vcpu));
439 kvm_inject_gp(vcpu, 0); 437 kvm_inject_gp(vcpu, 0);
440 return; 438 return;
441 } 439 }
@@ -444,14 +442,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
444 cr0 &= ~CR0_RESERVED_BITS; 442 cr0 &= ~CR0_RESERVED_BITS;
445 443
446 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 444 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
447 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
448 kvm_inject_gp(vcpu, 0); 445 kvm_inject_gp(vcpu, 0);
449 return; 446 return;
450 } 447 }
451 448
452 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 449 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
453 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
454 "and a clear PE flag\n");
455 kvm_inject_gp(vcpu, 0); 450 kvm_inject_gp(vcpu, 0);
456 return; 451 return;
457 } 452 }
@@ -462,15 +457,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
462 int cs_db, cs_l; 457 int cs_db, cs_l;
463 458
464 if (!is_pae(vcpu)) { 459 if (!is_pae(vcpu)) {
465 printk(KERN_DEBUG "set_cr0: #GP, start paging "
466 "in long mode while PAE is disabled\n");
467 kvm_inject_gp(vcpu, 0); 460 kvm_inject_gp(vcpu, 0);
468 return; 461 return;
469 } 462 }
470 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 463 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
471 if (cs_l) { 464 if (cs_l) {
472 printk(KERN_DEBUG "set_cr0: #GP, start paging "
473 "in long mode while CS.L == 1\n");
474 kvm_inject_gp(vcpu, 0); 465 kvm_inject_gp(vcpu, 0);
475 return; 466 return;
476 467
@@ -478,8 +469,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
478 } else 469 } else
479#endif 470#endif
480 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 471 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
481 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
482 "reserved bits\n");
483 kvm_inject_gp(vcpu, 0); 472 kvm_inject_gp(vcpu, 0);
484 return; 473 return;
485 } 474 }
@@ -506,28 +495,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
506 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 495 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
507 496
508 if (cr4 & CR4_RESERVED_BITS) { 497 if (cr4 & CR4_RESERVED_BITS) {
509 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
510 kvm_inject_gp(vcpu, 0); 498 kvm_inject_gp(vcpu, 0);
511 return; 499 return;
512 } 500 }
513 501
514 if (is_long_mode(vcpu)) { 502 if (is_long_mode(vcpu)) {
515 if (!(cr4 & X86_CR4_PAE)) { 503 if (!(cr4 & X86_CR4_PAE)) {
516 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
517 "in long mode\n");
518 kvm_inject_gp(vcpu, 0); 504 kvm_inject_gp(vcpu, 0);
519 return; 505 return;
520 } 506 }
521 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 507 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
522 && ((cr4 ^ old_cr4) & pdptr_bits) 508 && ((cr4 ^ old_cr4) & pdptr_bits)
523 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 509 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
524 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
525 kvm_inject_gp(vcpu, 0); 510 kvm_inject_gp(vcpu, 0);
526 return; 511 return;
527 } 512 }
528 513
529 if (cr4 & X86_CR4_VMXE) { 514 if (cr4 & X86_CR4_VMXE) {
530 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
531 kvm_inject_gp(vcpu, 0); 515 kvm_inject_gp(vcpu, 0);
532 return; 516 return;
533 } 517 }
@@ -548,21 +532,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
548 532
549 if (is_long_mode(vcpu)) { 533 if (is_long_mode(vcpu)) {
550 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 534 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
551 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
552 kvm_inject_gp(vcpu, 0); 535 kvm_inject_gp(vcpu, 0);
553 return; 536 return;
554 } 537 }
555 } else { 538 } else {
556 if (is_pae(vcpu)) { 539 if (is_pae(vcpu)) {
557 if (cr3 & CR3_PAE_RESERVED_BITS) { 540 if (cr3 & CR3_PAE_RESERVED_BITS) {
558 printk(KERN_DEBUG
559 "set_cr3: #GP, reserved bits\n");
560 kvm_inject_gp(vcpu, 0); 541 kvm_inject_gp(vcpu, 0);
561 return; 542 return;
562 } 543 }
563 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 544 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
564 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
565 "reserved bits\n");
566 kvm_inject_gp(vcpu, 0); 545 kvm_inject_gp(vcpu, 0);
567 return; 546 return;
568 } 547 }
@@ -594,7 +573,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
594void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 573void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
595{ 574{
596 if (cr8 & CR8_RESERVED_BITS) { 575 if (cr8 & CR8_RESERVED_BITS) {
597 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
598 kvm_inject_gp(vcpu, 0); 576 kvm_inject_gp(vcpu, 0);
599 return; 577 return;
600 } 578 }
@@ -650,15 +628,12 @@ static u32 emulated_msrs[] = {
650static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 628static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
651{ 629{
652 if (efer & efer_reserved_bits) { 630 if (efer & efer_reserved_bits) {
653 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
654 efer);
655 kvm_inject_gp(vcpu, 0); 631 kvm_inject_gp(vcpu, 0);
656 return; 632 return;
657 } 633 }
658 634
659 if (is_paging(vcpu) 635 if (is_paging(vcpu)
660 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 636 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
661 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
662 kvm_inject_gp(vcpu, 0); 637 kvm_inject_gp(vcpu, 0);
663 return; 638 return;
664 } 639 }
@@ -668,7 +643,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
668 643
669 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 644 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
670 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 645 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
671 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
672 kvm_inject_gp(vcpu, 0); 646 kvm_inject_gp(vcpu, 0);
673 return; 647 return;
674 } 648 }
@@ -679,7 +653,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
679 653
680 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 654 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
681 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 655 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
682 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
683 kvm_inject_gp(vcpu, 0); 656 kvm_inject_gp(vcpu, 0);
684 return; 657 return;
685 } 658 }
@@ -968,9 +941,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
968 if (msr >= MSR_IA32_MC0_CTL && 941 if (msr >= MSR_IA32_MC0_CTL &&
969 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 942 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
970 u32 offset = msr - MSR_IA32_MC0_CTL; 943 u32 offset = msr - MSR_IA32_MC0_CTL;
971 /* only 0 or all 1s can be written to IA32_MCi_CTL */ 944 /* only 0 or all 1s can be written to IA32_MCi_CTL
945 * some Linux kernels though clear bit 10 in bank 4 to
946 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
947 * this to avoid an uncatched #GP in the guest
948 */
972 if ((offset & 0x3) == 0 && 949 if ((offset & 0x3) == 0 &&
973 data != 0 && data != ~(u64)0) 950 data != 0 && (data | (1 << 10)) != ~(u64)0)
974 return -1; 951 return -1;
975 vcpu->arch.mce_banks[offset] = data; 952 vcpu->arch.mce_banks[offset] = data;
976 break; 953 break;
@@ -2636,8 +2613,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2636int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2613int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2637 struct kvm_dirty_log *log) 2614 struct kvm_dirty_log *log)
2638{ 2615{
2639 int r, n, i; 2616 int r, i;
2640 struct kvm_memory_slot *memslot; 2617 struct kvm_memory_slot *memslot;
2618 unsigned long n;
2641 unsigned long is_dirty = 0; 2619 unsigned long is_dirty = 0;
2642 unsigned long *dirty_bitmap = NULL; 2620 unsigned long *dirty_bitmap = NULL;
2643 2621
@@ -2652,7 +2630,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2652 if (!memslot->dirty_bitmap) 2630 if (!memslot->dirty_bitmap)
2653 goto out; 2631 goto out;
2654 2632
2655 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2633 n = kvm_dirty_bitmap_bytes(memslot);
2656 2634
2657 r = -ENOMEM; 2635 r = -ENOMEM;
2658 dirty_bitmap = vmalloc(n); 2636 dirty_bitmap = vmalloc(n);
@@ -4533,7 +4511,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4533 kvm_set_cr8(vcpu, kvm_run->cr8); 4511 kvm_set_cr8(vcpu, kvm_run->cr8);
4534 4512
4535 if (vcpu->arch.pio.cur_count) { 4513 if (vcpu->arch.pio.cur_count) {
4514 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4536 r = complete_pio(vcpu); 4515 r = complete_pio(vcpu);
4516 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4537 if (r) 4517 if (r)
4538 goto out; 4518 goto out;
4539 } 4519 }
@@ -5196,6 +5176,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5196 int ret = 0; 5176 int ret = 0;
5197 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5177 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5198 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5178 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5179 u32 desc_limit;
5199 5180
5200 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); 5181 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5201 5182
@@ -5218,7 +5199,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5218 } 5199 }
5219 } 5200 }
5220 5201
5221 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { 5202 desc_limit = get_desc_limit(&nseg_desc);
5203 if (!nseg_desc.p ||
5204 ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
5205 desc_limit < 0x2b)) {
5222 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 5206 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
5223 return 1; 5207 return 1;
5224 } 5208 }
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7e59dc1d3fc2..2bdf628066bd 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
115 local_irq_save(flags); 115 local_irq_save(flags);
116 if (lguest_data.hcall_status[next_call] != 0xFF) { 116 if (lguest_data.hcall_status[next_call] != 0xFF) {
117 /* Table full, so do normal hcall which will flush table. */ 117 /* Table full, so do normal hcall which will flush table. */
118 kvm_hypercall4(call, arg1, arg2, arg3, arg4); 118 hcall(call, arg1, arg2, arg3, arg4);
119 } else { 119 } else {
120 lguest_data.hcalls[next_call].arg0 = call; 120 lguest_data.hcalls[next_call].arg0 = call;
121 lguest_data.hcalls[next_call].arg1 = arg1; 121 lguest_data.hcalls[next_call].arg1 = arg1;
@@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1,
145 * So, when we're in lazy mode, we call async_hcall() to store the call for 145 * So, when we're in lazy mode, we call async_hcall() to store the call for
146 * future processing: 146 * future processing:
147 */ 147 */
148static void lazy_hcall1(unsigned long call, 148static void lazy_hcall1(unsigned long call, unsigned long arg1)
149 unsigned long arg1)
150{ 149{
151 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 150 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
152 kvm_hypercall1(call, arg1); 151 hcall(call, arg1, 0, 0, 0);
153 else 152 else
154 async_hcall(call, arg1, 0, 0, 0); 153 async_hcall(call, arg1, 0, 0, 0);
155} 154}
156 155
157/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ 156/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/
158static void lazy_hcall2(unsigned long call, 157static void lazy_hcall2(unsigned long call,
159 unsigned long arg1, 158 unsigned long arg1,
160 unsigned long arg2) 159 unsigned long arg2)
161{ 160{
162 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 161 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
163 kvm_hypercall2(call, arg1, arg2); 162 hcall(call, arg1, arg2, 0, 0);
164 else 163 else
165 async_hcall(call, arg1, arg2, 0, 0); 164 async_hcall(call, arg1, arg2, 0, 0);
166} 165}
167 166
168static void lazy_hcall3(unsigned long call, 167static void lazy_hcall3(unsigned long call,
169 unsigned long arg1, 168 unsigned long arg1,
170 unsigned long arg2, 169 unsigned long arg2,
171 unsigned long arg3) 170 unsigned long arg3)
172{ 171{
173 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 172 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
174 kvm_hypercall3(call, arg1, arg2, arg3); 173 hcall(call, arg1, arg2, arg3, 0);
175 else 174 else
176 async_hcall(call, arg1, arg2, arg3, 0); 175 async_hcall(call, arg1, arg2, arg3, 0);
177} 176}
178 177
179#ifdef CONFIG_X86_PAE 178#ifdef CONFIG_X86_PAE
180static void lazy_hcall4(unsigned long call, 179static void lazy_hcall4(unsigned long call,
181 unsigned long arg1, 180 unsigned long arg1,
182 unsigned long arg2, 181 unsigned long arg2,
183 unsigned long arg3, 182 unsigned long arg3,
184 unsigned long arg4) 183 unsigned long arg4)
185{ 184{
186 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 185 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
187 kvm_hypercall4(call, arg1, arg2, arg3, arg4); 186 hcall(call, arg1, arg2, arg3, arg4);
188 else 187 else
189 async_hcall(call, arg1, arg2, arg3, arg4); 188 async_hcall(call, arg1, arg2, arg3, arg4);
190} 189}
@@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call,
196:*/ 195:*/
197static void lguest_leave_lazy_mmu_mode(void) 196static void lguest_leave_lazy_mmu_mode(void)
198{ 197{
199 kvm_hypercall0(LHCALL_FLUSH_ASYNC); 198 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
200 paravirt_leave_lazy_mmu(); 199 paravirt_leave_lazy_mmu();
201} 200}
202 201
203static void lguest_end_context_switch(struct task_struct *next) 202static void lguest_end_context_switch(struct task_struct *next)
204{ 203{
205 kvm_hypercall0(LHCALL_FLUSH_ASYNC); 204 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
206 paravirt_end_context_switch(next); 205 paravirt_end_context_switch(next);
207} 206}
208 207
@@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt,
286 /* Keep the local copy up to date. */ 285 /* Keep the local copy up to date. */
287 native_write_idt_entry(dt, entrynum, g); 286 native_write_idt_entry(dt, entrynum, g);
288 /* Tell Host about this new entry. */ 287 /* Tell Host about this new entry. */
289 kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); 288 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);
290} 289}
291 290
292/* 291/*
@@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc)
300 struct desc_struct *idt = (void *)desc->address; 299 struct desc_struct *idt = (void *)desc->address;
301 300
302 for (i = 0; i < (desc->size+1)/8; i++) 301 for (i = 0; i < (desc->size+1)/8; i++)
303 kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); 302 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);
304} 303}
305 304
306/* 305/*
@@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
321 struct desc_struct *gdt = (void *)desc->address; 320 struct desc_struct *gdt = (void *)desc->address;
322 321
323 for (i = 0; i < (desc->size+1)/8; i++) 322 for (i = 0; i < (desc->size+1)/8; i++)
324 kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); 323 hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);
325} 324}
326 325
327/* 326/*
@@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
334{ 333{
335 native_write_gdt_entry(dt, entrynum, desc, type); 334 native_write_gdt_entry(dt, entrynum, desc, type);
336 /* Tell Host about this new entry. */ 335 /* Tell Host about this new entry. */
337 kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, 336 hcall(LHCALL_LOAD_GDT_ENTRY, entrynum,
338 dt[entrynum].a, dt[entrynum].b); 337 dt[entrynum].a, dt[entrynum].b, 0);
339} 338}
340 339
341/* 340/*
@@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,
931 } 930 }
932 931
933 /* Please wake us this far in the future. */ 932 /* Please wake us this far in the future. */
934 kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); 933 hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);
935 return 0; 934 return 0;
936} 935}
937 936
@@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode,
942 case CLOCK_EVT_MODE_UNUSED: 941 case CLOCK_EVT_MODE_UNUSED:
943 case CLOCK_EVT_MODE_SHUTDOWN: 942 case CLOCK_EVT_MODE_SHUTDOWN:
944 /* A 0 argument shuts the clock down. */ 943 /* A 0 argument shuts the clock down. */
945 kvm_hypercall0(LHCALL_SET_CLOCKEVENT); 944 hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);
946 break; 945 break;
947 case CLOCK_EVT_MODE_ONESHOT: 946 case CLOCK_EVT_MODE_ONESHOT:
948 /* This is what we expect. */ 947 /* This is what we expect. */
@@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void)
1100/* STOP! Until an interrupt comes in. */ 1099/* STOP! Until an interrupt comes in. */
1101static void lguest_safe_halt(void) 1100static void lguest_safe_halt(void)
1102{ 1101{
1103 kvm_hypercall0(LHCALL_HALT); 1102 hcall(LHCALL_HALT, 0, 0, 0, 0);
1104} 1103}
1105 1104
1106/* 1105/*
@@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void)
1112 */ 1111 */
1113static void lguest_power_off(void) 1112static void lguest_power_off(void)
1114{ 1113{
1115 kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), 1114 hcall(LHCALL_SHUTDOWN, __pa("Power down"),
1116 LGUEST_SHUTDOWN_POWEROFF); 1115 LGUEST_SHUTDOWN_POWEROFF, 0, 0);
1117} 1116}
1118 1117
1119/* 1118/*
@@ -1123,7 +1122,7 @@ static void lguest_power_off(void)
1123 */ 1122 */
1124static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) 1123static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
1125{ 1124{
1126 kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); 1125 hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);
1127 /* The hcall won't return, but to keep gcc happy, we're "done". */ 1126 /* The hcall won't return, but to keep gcc happy, we're "done". */
1128 return NOTIFY_DONE; 1127 return NOTIFY_DONE;
1129} 1128}
@@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
1162 len = sizeof(scratch) - 1; 1161 len = sizeof(scratch) - 1;
1163 scratch[len] = '\0'; 1162 scratch[len] = '\0';
1164 memcpy(scratch, buf, len); 1163 memcpy(scratch, buf, len);
1165 kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); 1164 hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0);
1166 1165
1167 /* This routine returns the number of bytes actually written. */ 1166 /* This routine returns the number of bytes actually written. */
1168 return len; 1167 return len;
@@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
1174 */ 1173 */
1175static void lguest_restart(char *reason) 1174static void lguest_restart(char *reason)
1176{ 1175{
1177 kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); 1176 hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
1178} 1177}
1179 1178
1180/*G:050 1179/*G:050
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 27eac0faee48..4f420c2f2d55 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -32,7 +32,7 @@ ENTRY(lguest_entry)
32 */ 32 */
33 movl $LHCALL_LGUEST_INIT, %eax 33 movl $LHCALL_LGUEST_INIT, %eax
34 movl $lguest_data - __PAGE_OFFSET, %ebx 34 movl $lguest_data - __PAGE_OFFSET, %ebx
35 .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ 35 int $LGUEST_TRAP_ENTRY
36 36
37 /* Set up the initial stack so we can run C code. */ 37 /* Set up the initial stack so we can run C code. */
38 movl $(init_thread_union+THREAD_SIZE),%esp 38 movl $(init_thread_union+THREAD_SIZE),%esp