diff options
Diffstat (limited to 'arch/x86')
46 files changed, 417 insertions, 289 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4a88cf7695b4..237fc128143d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -21,7 +21,8 @@ config X86 | |||
21 | select HAVE_IDE | 21 | select HAVE_IDE |
22 | select HAVE_OPROFILE | 22 | select HAVE_OPROFILE |
23 | select HAVE_KPROBES | 23 | select HAVE_KPROBES |
24 | select HAVE_KVM | 24 | select HAVE_KRETPROBES |
25 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | ||
25 | 26 | ||
26 | 27 | ||
27 | config GENERIC_LOCKBREAK | 28 | config GENERIC_LOCKBREAK |
@@ -65,9 +66,6 @@ config MMU | |||
65 | config ZONE_DMA | 66 | config ZONE_DMA |
66 | def_bool y | 67 | def_bool y |
67 | 68 | ||
68 | config QUICKLIST | ||
69 | def_bool X86_32 | ||
70 | |||
71 | config SBUS | 69 | config SBUS |
72 | bool | 70 | bool |
73 | 71 | ||
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index e09a6b73a1aa..9304bfba7d45 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -377,6 +377,19 @@ config X86_OOSTORE | |||
377 | def_bool y | 377 | def_bool y |
378 | depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR | 378 | depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR |
379 | 379 | ||
380 | # | ||
381 | # P6_NOPs are a relatively minor optimization that require a family >= | ||
382 | # 6 processor, except that it is broken on certain VIA chips. | ||
383 | # Furthermore, AMD chips prefer a totally different sequence of NOPs | ||
384 | # (which work on all CPUs). As a result, disallow these if we're | ||
385 | # compiling X86_GENERIC but not X86_64 (these NOPs do work on all | ||
386 | # x86-64 capable chips); the list of processors in the right-hand clause | ||
387 | # are the cores that benefit from this optimization. | ||
388 | # | ||
389 | config X86_P6_NOP | ||
390 | def_bool y | ||
391 | depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4) | ||
392 | |||
380 | config X86_TSC | 393 | config X86_TSC |
381 | def_bool y | 394 | def_bool y |
382 | depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 | 395 | depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 |
@@ -390,6 +403,7 @@ config X86_CMOV | |||
390 | config X86_MINIMUM_CPU_FAMILY | 403 | config X86_MINIMUM_CPU_FAMILY |
391 | int | 404 | int |
392 | default "64" if X86_64 | 405 | default "64" if X86_64 |
406 | default "6" if X86_32 && X86_P6_NOP | ||
393 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) | 407 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) |
394 | default "3" | 408 | default "3" |
395 | 409 | ||
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 378353956b5d..e77d89f9e8aa 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -37,6 +37,12 @@ static int detect_memory_e820(void) | |||
37 | "=m" (*desc) | 37 | "=m" (*desc) |
38 | : "D" (desc), "d" (SMAP), "a" (0xe820)); | 38 | : "D" (desc), "d" (SMAP), "a" (0xe820)); |
39 | 39 | ||
40 | /* BIOSes which terminate the chain with CF = 1 as opposed | ||
41 | to %ebx = 0 don't always report the SMAP signature on | ||
42 | the final, failing, probe. */ | ||
43 | if (err) | ||
44 | break; | ||
45 | |||
40 | /* Some BIOSes stop returning SMAP in the middle of | 46 | /* Some BIOSes stop returning SMAP in the middle of |
41 | the search loop. We don't know exactly how the BIOS | 47 | the search loop. We don't know exactly how the BIOS |
42 | screwed up the map at that point, we might have a | 48 | screwed up the map at that point, we might have a |
@@ -47,9 +53,6 @@ static int detect_memory_e820(void) | |||
47 | break; | 53 | break; |
48 | } | 54 | } |
49 | 55 | ||
50 | if (err) | ||
51 | break; | ||
52 | |||
53 | count++; | 56 | count++; |
54 | desc++; | 57 | desc++; |
55 | } while (next && count < E820MAX); | 58 | } while (next && count < E820MAX); |
diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h index ff5b73cd406f..468e444622c5 100644 --- a/arch/x86/boot/vesa.h +++ b/arch/x86/boot/vesa.h | |||
@@ -26,17 +26,10 @@ struct vesa_general_info { | |||
26 | far_ptr video_mode_ptr; /* 14 */ | 26 | far_ptr video_mode_ptr; /* 14 */ |
27 | u16 total_memory; /* 18 */ | 27 | u16 total_memory; /* 18 */ |
28 | 28 | ||
29 | u16 oem_software_rev; /* 20 */ | 29 | u8 reserved[236]; /* 20 */ |
30 | far_ptr oem_vendor_name_ptr; /* 22 */ | ||
31 | far_ptr oem_product_name_ptr; /* 26 */ | ||
32 | far_ptr oem_product_rev_ptr; /* 30 */ | ||
33 | |||
34 | u8 reserved[222]; /* 34 */ | ||
35 | u8 oem_data[256]; /* 256 */ | ||
36 | } __attribute__ ((packed)); | 30 | } __attribute__ ((packed)); |
37 | 31 | ||
38 | #define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24)) | 32 | #define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24)) |
39 | #define VBE2_MAGIC ('V' + ('B' << 8) + ('E' << 16) + ('2' << 24)) | ||
40 | 33 | ||
41 | struct vesa_mode_info { | 34 | struct vesa_mode_info { |
42 | u16 mode_attr; /* 0 */ | 35 | u16 mode_attr; /* 0 */ |
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 662dd2f13068..419b5c273374 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c | |||
@@ -37,8 +37,6 @@ static int vesa_probe(void) | |||
37 | 37 | ||
38 | video_vesa.modes = GET_HEAP(struct mode_info, 0); | 38 | video_vesa.modes = GET_HEAP(struct mode_info, 0); |
39 | 39 | ||
40 | vginfo.signature = VBE2_MAGIC; | ||
41 | |||
42 | ax = 0x4f00; | 40 | ax = 0x4f00; |
43 | di = (size_t)&vginfo; | 41 | di = (size_t)&vginfo; |
44 | asm(INT10 | 42 | asm(INT10 |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 1c0503bdfb1a..5e7771a3ba2f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -500,7 +500,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
500 | regs->ss = __USER32_DS; | 500 | regs->ss = __USER32_DS; |
501 | 501 | ||
502 | set_fs(USER_DS); | 502 | set_fs(USER_DS); |
503 | regs->flags &= ~X86_EFLAGS_TF; | 503 | regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); |
504 | if (test_thread_flag(TIF_SINGLESTEP)) | 504 | if (test_thread_flag(TIF_SINGLESTEP)) |
505 | ptrace_notify(SIGTRAP); | 505 | ptrace_notify(SIGTRAP); |
506 | 506 | ||
@@ -600,7 +600,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
600 | regs->ss = __USER32_DS; | 600 | regs->ss = __USER32_DS; |
601 | 601 | ||
602 | set_fs(USER_DS); | 602 | set_fs(USER_DS); |
603 | regs->flags &= ~X86_EFLAGS_TF; | 603 | regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); |
604 | if (test_thread_flag(TIF_SINGLESTEP)) | 604 | if (test_thread_flag(TIF_SINGLESTEP)) |
605 | ptrace_notify(SIGTRAP); | 605 | ptrace_notify(SIGTRAP); |
606 | 606 | ||
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index a33d53017997..8ea040124f7d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -128,13 +128,11 @@ void foo(void) | |||
128 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | 128 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); |
129 | #endif | 129 | #endif |
130 | 130 | ||
131 | #ifdef CONFIG_LGUEST_GUEST | 131 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
132 | BLANK(); | 132 | BLANK(); |
133 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 133 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
134 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); | 134 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); |
135 | #endif | ||
136 | 135 | ||
137 | #ifdef CONFIG_LGUEST | ||
138 | BLANK(); | 136 | BLANK(); |
139 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); | 137 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); |
140 | OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); | 138 | OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f86a3c4a2669..a38aafaefc23 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -504,7 +504,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
504 | 504 | ||
505 | /* Clear all flags overriden by options */ | 505 | /* Clear all flags overriden by options */ |
506 | for (i = 0; i < NCAPINTS; i++) | 506 | for (i = 0; i < NCAPINTS; i++) |
507 | c->x86_capability[i] ^= cleared_cpu_caps[i]; | 507 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; |
508 | 508 | ||
509 | /* Init Machine Check Exception if available. */ | 509 | /* Init Machine Check Exception if available. */ |
510 | mcheck_init(c); | 510 | mcheck_init(c); |
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index 39f8cb18296c..c2f930d86640 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c | |||
@@ -55,7 +55,6 @@ static int eps_set_state(struct eps_cpu_data *centaur, | |||
55 | { | 55 | { |
56 | struct cpufreq_freqs freqs; | 56 | struct cpufreq_freqs freqs; |
57 | u32 lo, hi; | 57 | u32 lo, hi; |
58 | u8 current_multiplier, current_voltage; | ||
59 | int err = 0; | 58 | int err = 0; |
60 | int i; | 59 | int i; |
61 | 60 | ||
@@ -95,6 +94,10 @@ postchange: | |||
95 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | 94 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); |
96 | freqs.new = centaur->fsb * ((lo >> 8) & 0xff); | 95 | freqs.new = centaur->fsb * ((lo >> 8) & 0xff); |
97 | 96 | ||
97 | #ifdef DEBUG | ||
98 | { | ||
99 | u8 current_multiplier, current_voltage; | ||
100 | |||
98 | /* Print voltage and multiplier */ | 101 | /* Print voltage and multiplier */ |
99 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | 102 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); |
100 | current_voltage = lo & 0xff; | 103 | current_voltage = lo & 0xff; |
@@ -103,7 +106,8 @@ postchange: | |||
103 | current_multiplier = (lo >> 8) & 0xff; | 106 | current_multiplier = (lo >> 8) & 0xff; |
104 | printk(KERN_INFO "eps: Current multiplier = %d\n", | 107 | printk(KERN_INFO "eps: Current multiplier = %d\n", |
105 | current_multiplier); | 108 | current_multiplier); |
106 | 109 | } | |
110 | #endif | ||
107 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 111 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
108 | return err; | 112 | return err; |
109 | } | 113 | } |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b6e136f23d3d..be83336fddba 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
44 | #include <asm/processor.h> | 44 | #include <asm/processor.h> |
45 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
46 | #include <asm/kvm_para.h> | ||
46 | #include "mtrr.h" | 47 | #include "mtrr.h" |
47 | 48 | ||
48 | u32 num_var_ranges = 0; | 49 | u32 num_var_ranges = 0; |
@@ -649,6 +650,7 @@ static __init int amd_special_default_mtrr(void) | |||
649 | 650 | ||
650 | /** | 651 | /** |
651 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | 652 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs |
653 | * @end_pfn: ending page frame number | ||
652 | * | 654 | * |
653 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain | 655 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain |
654 | * memory configurations. This routine checks that the highest MTRR matches | 656 | * memory configurations. This routine checks that the highest MTRR matches |
@@ -688,8 +690,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
688 | 690 | ||
689 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 691 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ |
690 | if (!highest_pfn) { | 692 | if (!highest_pfn) { |
691 | printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); | 693 | if (!kvm_para_available()) { |
692 | WARN_ON(1); | 694 | printk(KERN_WARNING |
695 | "WARNING: strange, CPU MTRRs all blank?\n"); | ||
696 | WARN_ON(1); | ||
697 | } | ||
693 | return 0; | 698 | return 0; |
694 | } | 699 | } |
695 | 700 | ||
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 200fb3f9ebfb..e8b422c1c512 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -76,13 +76,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
76 | /* All Transmeta CPUs have a constant TSC */ | 76 | /* All Transmeta CPUs have a constant TSC */ |
77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | 77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); |
78 | 78 | ||
79 | /* If we can run i686 user-space code, call us an i686 */ | ||
80 | #define USER686 ((1 << X86_FEATURE_TSC)|\ | ||
81 | (1 << X86_FEATURE_CX8)|\ | ||
82 | (1 << X86_FEATURE_CMOV)) | ||
83 | if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) | ||
84 | c->x86 = 6; | ||
85 | |||
86 | #ifdef CONFIG_SYSCTL | 79 | #ifdef CONFIG_SYSCTL |
87 | /* randomize_va_space slows us down enormously; | 80 | /* randomize_va_space slows us down enormously; |
88 | it probably triggers retranslation of x86->native bytecode */ | 81 | it probably triggers retranslation of x86->native bytecode */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2ad9a1bc6a73..c20c9e7e08dd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -453,6 +453,7 @@ ENTRY(stub_execve) | |||
453 | CFI_REGISTER rip, r11 | 453 | CFI_REGISTER rip, r11 |
454 | SAVE_REST | 454 | SAVE_REST |
455 | FIXUP_TOP_OF_STACK %r11 | 455 | FIXUP_TOP_OF_STACK %r11 |
456 | movq %rsp, %rcx | ||
456 | call sys_execve | 457 | call sys_execve |
457 | RESTORE_TOP_OF_STACK %r11 | 458 | RESTORE_TOP_OF_STACK %r11 |
458 | movq %rax,RAX(%rsp) | 459 | movq %rax,RAX(%rsp) |
@@ -1036,15 +1037,16 @@ ENDPROC(child_rip) | |||
1036 | * rdi: name, rsi: argv, rdx: envp | 1037 | * rdi: name, rsi: argv, rdx: envp |
1037 | * | 1038 | * |
1038 | * We want to fallback into: | 1039 | * We want to fallback into: |
1039 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) | 1040 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) |
1040 | * | 1041 | * |
1041 | * do_sys_execve asm fallback arguments: | 1042 | * do_sys_execve asm fallback arguments: |
1042 | * rdi: name, rsi: argv, rdx: envp, fake frame on the stack | 1043 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack |
1043 | */ | 1044 | */ |
1044 | ENTRY(kernel_execve) | 1045 | ENTRY(kernel_execve) |
1045 | CFI_STARTPROC | 1046 | CFI_STARTPROC |
1046 | FAKE_STACK_FRAME $0 | 1047 | FAKE_STACK_FRAME $0 |
1047 | SAVE_ALL | 1048 | SAVE_ALL |
1049 | movq %rsp,%rcx | ||
1048 | call sys_execve | 1050 | call sys_execve |
1049 | movq %rax, RAX(%rsp) | 1051 | movq %rax, RAX(%rsp) |
1050 | RESTORE_REST | 1052 | RESTORE_REST |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 25eb98540a41..fd8ca53943a8 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -606,7 +606,7 @@ ENTRY(_stext) | |||
606 | .section ".bss.page_aligned","wa" | 606 | .section ".bss.page_aligned","wa" |
607 | .align PAGE_SIZE_asm | 607 | .align PAGE_SIZE_asm |
608 | #ifdef CONFIG_X86_PAE | 608 | #ifdef CONFIG_X86_PAE |
609 | ENTRY(swapper_pg_pmd) | 609 | swapper_pg_pmd: |
610 | .fill 1024*KPMDS,4,0 | 610 | .fill 1024*KPMDS,4,0 |
611 | #else | 611 | #else |
612 | ENTRY(swapper_pg_dir) | 612 | ENTRY(swapper_pg_dir) |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index eb415043a929..a007454133a3 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt) | |||
379 | /* Since I easily can, map the first 1G. | 379 | /* Since I easily can, map the first 1G. |
380 | * Don't set NX because code runs from these pages. | 380 | * Don't set NX because code runs from these pages. |
381 | */ | 381 | */ |
382 | PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) | 382 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) |
383 | 383 | ||
384 | NEXT_PAGE(level2_kernel_pgt) | 384 | NEXT_PAGE(level2_kernel_pgt) |
385 | /* 40MB kernel mapping. The kernel code cannot be bigger than that. | 385 | /* |
386 | When you change this change KERNEL_TEXT_SIZE in page.h too. */ | 386 | * 128 MB kernel mapping. We spend a full page on this pagetable |
387 | /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ | 387 | * anyway. |
388 | PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE) | 388 | * |
389 | /* Module mapping starts here */ | 389 | * The kernel code+data+bss must not be bigger than that. |
390 | .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0 | 390 | * |
391 | * (NOTE: at +128MB starts the module area, see MODULES_VADDR. | ||
392 | * If you want to increase this then increase MODULES_VADDR | ||
393 | * too.) | ||
394 | */ | ||
395 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, | ||
396 | KERNEL_IMAGE_SIZE/PMD_SIZE) | ||
391 | 397 | ||
392 | NEXT_PAGE(level2_spare_pgt) | 398 | NEXT_PAGE(level2_spare_pgt) |
393 | .fill 512,8,0 | 399 | .fill 512, 8, 0 |
394 | 400 | ||
395 | #undef PMDS | 401 | #undef PMDS |
396 | #undef NEXT_PAGE | 402 | #undef NEXT_PAGE |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 429d084e014d..235fd6c77504 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -368,8 +368,8 @@ static int hpet_clocksource_register(void) | |||
368 | return 0; | 368 | return 0; |
369 | } | 369 | } |
370 | 370 | ||
371 | /* | 371 | /** |
372 | * Try to setup the HPET timer | 372 | * hpet_enable - Try to setup the HPET timer. Returns 1 on success. |
373 | */ | 373 | */ |
374 | int __init hpet_enable(void) | 374 | int __init hpet_enable(void) |
375 | { | 375 | { |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 763dfc407232..d2e39e69aaf8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -132,7 +132,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
132 | if (!cpu_has_fxsr) | 132 | if (!cpu_has_fxsr) |
133 | return -ENODEV; | 133 | return -ENODEV; |
134 | 134 | ||
135 | unlazy_fpu(target); | 135 | init_fpu(target); |
136 | 136 | ||
137 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 137 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
138 | &target->thread.i387.fxsave, 0, -1); | 138 | &target->thread.i387.fxsave, 0, -1); |
@@ -147,7 +147,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
147 | if (!cpu_has_fxsr) | 147 | if (!cpu_has_fxsr) |
148 | return -ENODEV; | 148 | return -ENODEV; |
149 | 149 | ||
150 | unlazy_fpu(target); | 150 | init_fpu(target); |
151 | set_stopped_child_used_math(target); | 151 | set_stopped_child_used_math(target); |
152 | 152 | ||
153 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 153 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
@@ -261,7 +261,7 @@ static void convert_from_fxsr(struct user_i387_ia32_struct *env, | |||
261 | } | 261 | } |
262 | #else | 262 | #else |
263 | env->fip = fxsave->fip; | 263 | env->fip = fxsave->fip; |
264 | env->fcs = fxsave->fcs; | 264 | env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); |
265 | env->foo = fxsave->foo; | 265 | env->foo = fxsave->foo; |
266 | env->fos = fxsave->fos; | 266 | env->fos = fxsave->fos; |
267 | #endif | 267 | #endif |
@@ -307,7 +307,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
307 | if (!HAVE_HWFP) | 307 | if (!HAVE_HWFP) |
308 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | 308 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); |
309 | 309 | ||
310 | unlazy_fpu(target); | 310 | init_fpu(target); |
311 | 311 | ||
312 | if (!cpu_has_fxsr) | 312 | if (!cpu_has_fxsr) |
313 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 313 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
@@ -332,7 +332,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
332 | if (!HAVE_HWFP) | 332 | if (!HAVE_HWFP) |
333 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | 333 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); |
334 | 334 | ||
335 | unlazy_fpu(target); | 335 | init_fpu(target); |
336 | set_stopped_child_used_math(target); | 336 | set_stopped_child_used_math(target); |
337 | 337 | ||
338 | if (!cpu_has_fxsr) | 338 | if (!cpu_has_fxsr) |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 5b3ce7934363..3d01e47777db 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -15,6 +15,7 @@ static struct files_struct init_files = INIT_FILES; | |||
15 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 15 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
16 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 16 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
17 | struct mm_struct init_mm = INIT_MM(init_mm); | 17 | struct mm_struct init_mm = INIT_MM(init_mm); |
18 | EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ | ||
18 | 19 | ||
19 | /* | 20 | /* |
20 | * Initial thread structure. | 21 | * Initial thread structure. |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a7d50a547dc2..be3c7a299f02 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -603,11 +603,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
603 | } | 603 | } |
604 | #endif | 604 | #endif |
605 | 605 | ||
606 | #ifdef X86_BTS | ||
606 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 607 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
607 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 608 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
608 | 609 | ||
609 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 610 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
610 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 611 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
612 | #endif | ||
611 | 613 | ||
612 | 614 | ||
613 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 615 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b0cc8f0136d8..3baf9b9f4c87 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -604,11 +604,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
604 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 604 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
605 | } | 605 | } |
606 | 606 | ||
607 | #ifdef X86_BTS | ||
607 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 608 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
608 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 609 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
609 | 610 | ||
610 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 611 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
611 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 612 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
613 | #endif | ||
612 | } | 614 | } |
613 | 615 | ||
614 | /* | 616 | /* |
@@ -730,16 +732,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
730 | */ | 732 | */ |
731 | asmlinkage | 733 | asmlinkage |
732 | long sys_execve(char __user *name, char __user * __user *argv, | 734 | long sys_execve(char __user *name, char __user * __user *argv, |
733 | char __user * __user *envp, struct pt_regs regs) | 735 | char __user * __user *envp, struct pt_regs *regs) |
734 | { | 736 | { |
735 | long error; | 737 | long error; |
736 | char * filename; | 738 | char * filename; |
737 | 739 | ||
738 | filename = getname(name); | 740 | filename = getname(name); |
739 | error = PTR_ERR(filename); | 741 | error = PTR_ERR(filename); |
740 | if (IS_ERR(filename)) | 742 | if (IS_ERR(filename)) |
741 | return error; | 743 | return error; |
742 | error = do_execve(filename, argv, envp, ®s); | 744 | error = do_execve(filename, argv, envp, regs); |
743 | putname(filename); | 745 | putname(filename); |
744 | return error; | 746 | return error; |
745 | } | 747 | } |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d862e396b099..d5904eef1d31 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -323,6 +323,16 @@ static int putreg(struct task_struct *child, | |||
323 | return set_flags(child, value); | 323 | return set_flags(child, value); |
324 | 324 | ||
325 | #ifdef CONFIG_X86_64 | 325 | #ifdef CONFIG_X86_64 |
326 | /* | ||
327 | * Orig_ax is really just a flag with small positive and | ||
328 | * negative values, so make sure to always sign-extend it | ||
329 | * from 32 bits so that it works correctly regardless of | ||
330 | * whether we come from a 32-bit environment or not. | ||
331 | */ | ||
332 | case offsetof(struct user_regs_struct, orig_ax): | ||
333 | value = (long) (s32) value; | ||
334 | break; | ||
335 | |||
326 | case offsetof(struct user_regs_struct,fs_base): | 336 | case offsetof(struct user_regs_struct,fs_base): |
327 | if (value >= TASK_SIZE_OF(child)) | 337 | if (value >= TASK_SIZE_OF(child)) |
328 | return -EIO; | 338 | return -EIO; |
@@ -544,6 +554,8 @@ static int ptrace_set_debugreg(struct task_struct *child, | |||
544 | return 0; | 554 | return 0; |
545 | } | 555 | } |
546 | 556 | ||
557 | #ifdef X86_BTS | ||
558 | |||
547 | static int ptrace_bts_get_size(struct task_struct *child) | 559 | static int ptrace_bts_get_size(struct task_struct *child) |
548 | { | 560 | { |
549 | if (!child->thread.ds_area_msr) | 561 | if (!child->thread.ds_area_msr) |
@@ -826,6 +838,7 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, | |||
826 | 838 | ||
827 | ptrace_bts_write_record(tsk, &rec); | 839 | ptrace_bts_write_record(tsk, &rec); |
828 | } | 840 | } |
841 | #endif /* X86_BTS */ | ||
829 | 842 | ||
830 | /* | 843 | /* |
831 | * Called by kernel/ptrace.c when detaching.. | 844 | * Called by kernel/ptrace.c when detaching.. |
@@ -839,7 +852,9 @@ void ptrace_disable(struct task_struct *child) | |||
839 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 852 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
840 | #endif | 853 | #endif |
841 | if (child->thread.ds_area_msr) { | 854 | if (child->thread.ds_area_msr) { |
855 | #ifdef X86_BTS | ||
842 | ptrace_bts_realloc(child, 0, 0); | 856 | ptrace_bts_realloc(child, 0, 0); |
857 | #endif | ||
843 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 858 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); |
844 | if (!child->thread.debugctlmsr) | 859 | if (!child->thread.debugctlmsr) |
845 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 860 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
@@ -961,6 +976,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
961 | break; | 976 | break; |
962 | #endif | 977 | #endif |
963 | 978 | ||
979 | /* | ||
980 | * These bits need more cooking - not enabled yet: | ||
981 | */ | ||
982 | #ifdef X86_BTS | ||
964 | case PTRACE_BTS_CONFIG: | 983 | case PTRACE_BTS_CONFIG: |
965 | ret = ptrace_bts_config | 984 | ret = ptrace_bts_config |
966 | (child, data, (struct ptrace_bts_config __user *)addr); | 985 | (child, data, (struct ptrace_bts_config __user *)addr); |
@@ -988,6 +1007,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
988 | ret = ptrace_bts_drain | 1007 | ret = ptrace_bts_drain |
989 | (child, data, (struct bts_struct __user *) addr); | 1008 | (child, data, (struct bts_struct __user *) addr); |
990 | break; | 1009 | break; |
1010 | #endif | ||
991 | 1011 | ||
992 | default: | 1012 | default: |
993 | ret = ptrace_request(child, request, addr, data); | 1013 | ret = ptrace_request(child, request, addr, data); |
@@ -1035,10 +1055,17 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) | |||
1035 | R32(esi, si); | 1055 | R32(esi, si); |
1036 | R32(ebp, bp); | 1056 | R32(ebp, bp); |
1037 | R32(eax, ax); | 1057 | R32(eax, ax); |
1038 | R32(orig_eax, orig_ax); | ||
1039 | R32(eip, ip); | 1058 | R32(eip, ip); |
1040 | R32(esp, sp); | 1059 | R32(esp, sp); |
1041 | 1060 | ||
1061 | case offsetof(struct user32, regs.orig_eax): | ||
1062 | /* | ||
1063 | * Sign-extend the value so that orig_eax = -1 | ||
1064 | * causes (long)orig_ax < 0 tests to fire correctly. | ||
1065 | */ | ||
1066 | regs->orig_ax = (long) (s32) value; | ||
1067 | break; | ||
1068 | |||
1042 | case offsetof(struct user32, regs.eflags): | 1069 | case offsetof(struct user32, regs.eflags): |
1043 | return set_flags(child, value); | 1070 | return set_flags(child, value); |
1044 | 1071 | ||
@@ -1226,12 +1253,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) | |||
1226 | case PTRACE_SETOPTIONS: | 1253 | case PTRACE_SETOPTIONS: |
1227 | case PTRACE_SET_THREAD_AREA: | 1254 | case PTRACE_SET_THREAD_AREA: |
1228 | case PTRACE_GET_THREAD_AREA: | 1255 | case PTRACE_GET_THREAD_AREA: |
1256 | #ifdef X86_BTS | ||
1229 | case PTRACE_BTS_CONFIG: | 1257 | case PTRACE_BTS_CONFIG: |
1230 | case PTRACE_BTS_STATUS: | 1258 | case PTRACE_BTS_STATUS: |
1231 | case PTRACE_BTS_SIZE: | 1259 | case PTRACE_BTS_SIZE: |
1232 | case PTRACE_BTS_GET: | 1260 | case PTRACE_BTS_GET: |
1233 | case PTRACE_BTS_CLEAR: | 1261 | case PTRACE_BTS_CLEAR: |
1234 | case PTRACE_BTS_DRAIN: | 1262 | case PTRACE_BTS_DRAIN: |
1263 | #endif | ||
1235 | return sys_ptrace(request, pid, addr, data); | 1264 | return sys_ptrace(request, pid, addr, data); |
1236 | 1265 | ||
1237 | default: | 1266 | default: |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 7fd6ac43e4a1..55ceb8cdef75 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -326,6 +326,10 @@ static inline void kb_wait(void) | |||
326 | } | 326 | } |
327 | } | 327 | } |
328 | 328 | ||
329 | void __attribute__((weak)) mach_reboot_fixups(void) | ||
330 | { | ||
331 | } | ||
332 | |||
329 | static void native_machine_emergency_restart(void) | 333 | static void native_machine_emergency_restart(void) |
330 | { | 334 | { |
331 | int i; | 335 | int i; |
@@ -337,6 +341,8 @@ static void native_machine_emergency_restart(void) | |||
337 | /* Could also try the reset bit in the Hammer NB */ | 341 | /* Could also try the reset bit in the Hammer NB */ |
338 | switch (reboot_type) { | 342 | switch (reboot_type) { |
339 | case BOOT_KBD: | 343 | case BOOT_KBD: |
344 | mach_reboot_fixups(); /* for board specific fixups */ | ||
345 | |||
340 | for (i = 0; i < 10; i++) { | 346 | for (i = 0; i < 10; i++) { |
341 | kb_wait(); | 347 | kb_wait(); |
342 | udelay(50); | 348 | udelay(50); |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6fd804f07821..7637dc91c79b 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -1021,7 +1021,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
1021 | 1021 | ||
1022 | /* Clear all flags overriden by options */ | 1022 | /* Clear all flags overriden by options */ |
1023 | for (i = 0; i < NCAPINTS; i++) | 1023 | for (i = 0; i < NCAPINTS; i++) |
1024 | c->x86_capability[i] ^= cleared_cpu_caps[i]; | 1024 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; |
1025 | 1025 | ||
1026 | #ifdef CONFIG_X86_MCE | 1026 | #ifdef CONFIG_X86_MCE |
1027 | mcheck_init(c); | 1027 | mcheck_init(c); |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index caee1f002fed..0157a6f0f41f 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
@@ -407,7 +407,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
407 | * The tracer may want to single-step inside the | 407 | * The tracer may want to single-step inside the |
408 | * handler too. | 408 | * handler too. |
409 | */ | 409 | */ |
410 | regs->flags &= ~TF_MASK; | 410 | regs->flags &= ~(TF_MASK | X86_EFLAGS_DF); |
411 | if (test_thread_flag(TIF_SINGLESTEP)) | 411 | if (test_thread_flag(TIF_SINGLESTEP)) |
412 | ptrace_notify(SIGTRAP); | 412 | ptrace_notify(SIGTRAP); |
413 | 413 | ||
@@ -500,7 +500,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
500 | * The tracer may want to single-step inside the | 500 | * The tracer may want to single-step inside the |
501 | * handler too. | 501 | * handler too. |
502 | */ | 502 | */ |
503 | regs->flags &= ~TF_MASK; | 503 | regs->flags &= ~(TF_MASK | X86_EFLAGS_DF); |
504 | if (test_thread_flag(TIF_SINGLESTEP)) | 504 | if (test_thread_flag(TIF_SINGLESTEP)) |
505 | ptrace_notify(SIGTRAP); | 505 | ptrace_notify(SIGTRAP); |
506 | 506 | ||
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 7347bb14e306..1c83e5124c65 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
@@ -295,7 +295,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
295 | see include/asm-x86_64/uaccess.h for details. */ | 295 | see include/asm-x86_64/uaccess.h for details. */ |
296 | set_fs(USER_DS); | 296 | set_fs(USER_DS); |
297 | 297 | ||
298 | regs->flags &= ~X86_EFLAGS_TF; | 298 | regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); |
299 | if (test_thread_flag(TIF_SINGLESTEP)) | 299 | if (test_thread_flag(TIF_SINGLESTEP)) |
300 | ptrace_notify(SIGTRAP); | 300 | ptrace_notify(SIGTRAP); |
301 | #ifdef DEBUG_SIG | 301 | #ifdef DEBUG_SIG |
@@ -311,6 +311,35 @@ give_sigsegv: | |||
311 | } | 311 | } |
312 | 312 | ||
313 | /* | 313 | /* |
314 | * Return -1L or the syscall number that @regs is executing. | ||
315 | */ | ||
316 | static long current_syscall(struct pt_regs *regs) | ||
317 | { | ||
318 | /* | ||
319 | * We always sign-extend a -1 value being set here, | ||
320 | * so this is always either -1L or a syscall number. | ||
321 | */ | ||
322 | return regs->orig_ax; | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Return a value that is -EFOO if the system call in @regs->orig_ax | ||
327 | * returned an error. This only works for @regs from @current. | ||
328 | */ | ||
329 | static long current_syscall_ret(struct pt_regs *regs) | ||
330 | { | ||
331 | #ifdef CONFIG_IA32_EMULATION | ||
332 | if (test_thread_flag(TIF_IA32)) | ||
333 | /* | ||
334 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO | ||
335 | * and will match correctly in comparisons. | ||
336 | */ | ||
337 | return (int) regs->ax; | ||
338 | #endif | ||
339 | return regs->ax; | ||
340 | } | ||
341 | |||
342 | /* | ||
314 | * OK, we're invoking a handler | 343 | * OK, we're invoking a handler |
315 | */ | 344 | */ |
316 | 345 | ||
@@ -327,9 +356,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
327 | #endif | 356 | #endif |
328 | 357 | ||
329 | /* Are we from a system call? */ | 358 | /* Are we from a system call? */ |
330 | if ((long)regs->orig_ax >= 0) { | 359 | if (current_syscall(regs) >= 0) { |
331 | /* If so, check system call restarting.. */ | 360 | /* If so, check system call restarting.. */ |
332 | switch (regs->ax) { | 361 | switch (current_syscall_ret(regs)) { |
333 | case -ERESTART_RESTARTBLOCK: | 362 | case -ERESTART_RESTARTBLOCK: |
334 | case -ERESTARTNOHAND: | 363 | case -ERESTARTNOHAND: |
335 | regs->ax = -EINTR; | 364 | regs->ax = -EINTR; |
@@ -426,10 +455,9 @@ static void do_signal(struct pt_regs *regs) | |||
426 | } | 455 | } |
427 | 456 | ||
428 | /* Did we come from a system call? */ | 457 | /* Did we come from a system call? */ |
429 | if ((long)regs->orig_ax >= 0) { | 458 | if (current_syscall(regs) >= 0) { |
430 | /* Restart the system call - no handlers present */ | 459 | /* Restart the system call - no handlers present */ |
431 | long res = regs->ax; | 460 | switch (current_syscall_ret(regs)) { |
432 | switch (res) { | ||
433 | case -ERESTARTNOHAND: | 461 | case -ERESTARTNOHAND: |
434 | case -ERESTARTSYS: | 462 | case -ERESTARTSYS: |
435 | case -ERESTARTNOINTR: | 463 | case -ERESTARTNOINTR: |
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index d53bd6fcb428..0880f2c388a9 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c | |||
@@ -554,10 +554,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
554 | int timeout; | 554 | int timeout; |
555 | unsigned long start_rip; | 555 | unsigned long start_rip; |
556 | struct create_idle c_idle = { | 556 | struct create_idle c_idle = { |
557 | .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle), | ||
558 | .cpu = cpu, | 557 | .cpu = cpu, |
559 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | 558 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), |
560 | }; | 559 | }; |
560 | INIT_WORK(&c_idle.work, do_fork_idle); | ||
561 | 561 | ||
562 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | 562 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ |
563 | if (!cpu_gdt_descr[cpu].address && | 563 | if (!cpu_gdt_descr[cpu].address && |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 02f0f61f5b11..c28c342c162f 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -25,6 +25,8 @@ static int save_stack_stack(void *data, char *name) | |||
25 | static void save_stack_address(void *data, unsigned long addr, int reliable) | 25 | static void save_stack_address(void *data, unsigned long addr, int reliable) |
26 | { | 26 | { |
27 | struct stack_trace *trace = data; | 27 | struct stack_trace *trace = data; |
28 | if (!reliable) | ||
29 | return; | ||
28 | if (trace->skip > 0) { | 30 | if (trace->skip > 0) { |
29 | trace->skip--; | 31 | trace->skip--; |
30 | return; | 32 | return; |
@@ -37,6 +39,8 @@ static void | |||
37 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | 39 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) |
38 | { | 40 | { |
39 | struct stack_trace *trace = (struct stack_trace *)data; | 41 | struct stack_trace *trace = (struct stack_trace *)data; |
42 | if (!reliable) | ||
43 | return; | ||
40 | if (in_sched_functions(addr)) | 44 | if (in_sched_functions(addr)) |
41 | return; | 45 | return; |
42 | if (trace->skip > 0) { | 46 | if (trace->skip > 0) { |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 2ef1a5f8d675..9d406cdc847f 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -166,7 +166,7 @@ static void enable_step(struct task_struct *child, bool block) | |||
166 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); | 166 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); |
167 | } else { | 167 | } else { |
168 | write_debugctlmsr(child, | 168 | write_debugctlmsr(child, |
169 | child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); | 169 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); |
170 | 170 | ||
171 | if (!child->thread.debugctlmsr) | 171 | if (!child->thread.debugctlmsr) |
172 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 172 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
@@ -189,7 +189,7 @@ void user_disable_single_step(struct task_struct *child) | |||
189 | * Make sure block stepping (BTF) is disabled. | 189 | * Make sure block stepping (BTF) is disabled. |
190 | */ | 190 | */ |
191 | write_debugctlmsr(child, | 191 | write_debugctlmsr(child, |
192 | child->thread.debugctlmsr & ~TIF_DEBUGCTLMSR); | 192 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); |
193 | 193 | ||
194 | if (!child->thread.debugctlmsr) | 194 | if (!child->thread.debugctlmsr) |
195 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 195 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 6dfd4e76661a..022bcaa3b42e 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c | |||
@@ -91,7 +91,9 @@ int do_set_thread_area(struct task_struct *p, int idx, | |||
91 | 91 | ||
92 | asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) | 92 | asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) |
93 | { | 93 | { |
94 | return do_set_thread_area(current, -1, u_info, 1); | 94 | int ret = do_set_thread_area(current, -1, u_info, 1); |
95 | prevent_tail_call(ret); | ||
96 | return ret; | ||
95 | } | 97 | } |
96 | 98 | ||
97 | 99 | ||
@@ -139,7 +141,9 @@ int do_get_thread_area(struct task_struct *p, int idx, | |||
139 | 141 | ||
140 | asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) | 142 | asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) |
141 | { | 143 | { |
142 | return do_get_thread_area(current, -1, u_info); | 144 | int ret = do_get_thread_area(current, -1, u_info); |
145 | prevent_tail_call(ret); | ||
146 | return ret; | ||
143 | } | 147 | } |
144 | 148 | ||
145 | int regset_tls_active(struct task_struct *target, | 149 | int regset_tls_active(struct task_struct *target, |
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 43517e324be8..f14cfd9d1f94 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c | |||
@@ -28,7 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz); | |||
28 | static int __init tsc_setup(char *str) | 28 | static int __init tsc_setup(char *str) |
29 | { | 29 | { |
30 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | 30 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " |
31 | "cannot disable TSC.\n"); | 31 | "cannot disable TSC completely.\n"); |
32 | mark_tsc_unstable("user disabled TSC"); | ||
32 | return 1; | 33 | return 1; |
33 | } | 34 | } |
34 | #else | 35 | #else |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3f8242774580..edff4c985485 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -44,11 +44,6 @@ | |||
44 | 44 | ||
45 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 45 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
46 | #define __syscall_clobber "r11","cx","memory" | 46 | #define __syscall_clobber "r11","cx","memory" |
47 | #define __pa_vsymbol(x) \ | ||
48 | ({unsigned long v; \ | ||
49 | extern char __vsyscall_0; \ | ||
50 | asm("" : "=r" (v) : "0" (x)); \ | ||
51 | ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) | ||
52 | 47 | ||
53 | /* | 48 | /* |
54 | * vsyscall_gtod_data contains data that is : | 49 | * vsyscall_gtod_data contains data that is : |
@@ -102,7 +97,7 @@ static __always_inline void do_get_tz(struct timezone * tz) | |||
102 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 97 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) |
103 | { | 98 | { |
104 | int ret; | 99 | int ret; |
105 | asm volatile("vsysc2: syscall" | 100 | asm volatile("syscall" |
106 | : "=a" (ret) | 101 | : "=a" (ret) |
107 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) | 102 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) |
108 | : __syscall_clobber ); | 103 | : __syscall_clobber ); |
@@ -112,7 +107,7 @@ static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | |||
112 | static __always_inline long time_syscall(long *t) | 107 | static __always_inline long time_syscall(long *t) |
113 | { | 108 | { |
114 | long secs; | 109 | long secs; |
115 | asm volatile("vsysc1: syscall" | 110 | asm volatile("syscall" |
116 | : "=a" (secs) | 111 | : "=a" (secs) |
117 | : "0" (__NR_time),"D" (t) : __syscall_clobber); | 112 | : "0" (__NR_time),"D" (t) : __syscall_clobber); |
118 | return secs; | 113 | return secs; |
@@ -228,42 +223,11 @@ long __vsyscall(3) venosys_1(void) | |||
228 | 223 | ||
229 | #ifdef CONFIG_SYSCTL | 224 | #ifdef CONFIG_SYSCTL |
230 | 225 | ||
231 | #define SYSCALL 0x050f | 226 | static int |
232 | #define NOP2 0x9090 | 227 | vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, |
233 | 228 | void __user *buffer, size_t *lenp, loff_t *ppos) | |
234 | /* | ||
235 | * NOP out syscall in vsyscall page when not needed. | ||
236 | */ | ||
237 | static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | ||
238 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
239 | { | 229 | { |
240 | extern u16 vsysc1, vsysc2; | 230 | return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); |
241 | u16 __iomem *map1; | ||
242 | u16 __iomem *map2; | ||
243 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | ||
244 | if (!write) | ||
245 | return ret; | ||
246 | /* gcc has some trouble with __va(__pa()), so just do it this | ||
247 | way. */ | ||
248 | map1 = ioremap(__pa_vsymbol(&vsysc1), 2); | ||
249 | if (!map1) | ||
250 | return -ENOMEM; | ||
251 | map2 = ioremap(__pa_vsymbol(&vsysc2), 2); | ||
252 | if (!map2) { | ||
253 | ret = -ENOMEM; | ||
254 | goto out; | ||
255 | } | ||
256 | if (!vsyscall_gtod_data.sysctl_enabled) { | ||
257 | writew(SYSCALL, map1); | ||
258 | writew(SYSCALL, map2); | ||
259 | } else { | ||
260 | writew(NOP2, map1); | ||
261 | writew(NOP2, map2); | ||
262 | } | ||
263 | iounmap(map2); | ||
264 | out: | ||
265 | iounmap(map1); | ||
266 | return ret; | ||
267 | } | 231 | } |
268 | 232 | ||
269 | static ctl_table kernel_table2[] = { | 233 | static ctl_table kernel_table2[] = { |
@@ -279,7 +243,6 @@ static ctl_table kernel_root_table2[] = { | |||
279 | .child = kernel_table2 }, | 243 | .child = kernel_table2 }, |
280 | {} | 244 | {} |
281 | }; | 245 | }; |
282 | |||
283 | #endif | 246 | #endif |
284 | 247 | ||
285 | /* Assume __initcall executes before all user space. Hopefully kmod | 248 | /* Assume __initcall executes before all user space. Hopefully kmod |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2cbee9479ce4..68a6b1511934 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -647,6 +647,10 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
647 | apic->timer.period = apic_get_reg(apic, APIC_TMICT) * | 647 | apic->timer.period = apic_get_reg(apic, APIC_TMICT) * |
648 | APIC_BUS_CYCLE_NS * apic->timer.divide_count; | 648 | APIC_BUS_CYCLE_NS * apic->timer.divide_count; |
649 | atomic_set(&apic->timer.pending, 0); | 649 | atomic_set(&apic->timer.pending, 0); |
650 | |||
651 | if (!apic->timer.period) | ||
652 | return; | ||
653 | |||
650 | hrtimer_start(&apic->timer.dev, | 654 | hrtimer_start(&apic->timer.dev, |
651 | ktime_add_ns(now, apic->timer.period), | 655 | ktime_add_ns(now, apic->timer.period), |
652 | HRTIMER_MODE_ABS); | 656 | HRTIMER_MODE_ABS); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8efdcdbebb03..d8172aabc660 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -681,8 +681,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
681 | unsigned level, | 681 | unsigned level, |
682 | int metaphysical, | 682 | int metaphysical, |
683 | unsigned access, | 683 | unsigned access, |
684 | u64 *parent_pte, | 684 | u64 *parent_pte) |
685 | bool *new_page) | ||
686 | { | 685 | { |
687 | union kvm_mmu_page_role role; | 686 | union kvm_mmu_page_role role; |
688 | unsigned index; | 687 | unsigned index; |
@@ -722,8 +721,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
722 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | 721 | vcpu->arch.mmu.prefetch_page(vcpu, sp); |
723 | if (!metaphysical) | 722 | if (!metaphysical) |
724 | rmap_write_protect(vcpu->kvm, gfn); | 723 | rmap_write_protect(vcpu->kvm, gfn); |
725 | if (new_page) | ||
726 | *new_page = 1; | ||
727 | return sp; | 724 | return sp; |
728 | } | 725 | } |
729 | 726 | ||
@@ -876,11 +873,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | |||
876 | 873 | ||
877 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | 874 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) |
878 | { | 875 | { |
876 | struct page *page; | ||
877 | |||
879 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 878 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); |
880 | 879 | ||
881 | if (gpa == UNMAPPED_GVA) | 880 | if (gpa == UNMAPPED_GVA) |
882 | return NULL; | 881 | return NULL; |
883 | return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 882 | |
883 | down_read(¤t->mm->mmap_sem); | ||
884 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
885 | up_read(¤t->mm->mmap_sem); | ||
886 | |||
887 | return page; | ||
884 | } | 888 | } |
885 | 889 | ||
886 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 890 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
@@ -999,8 +1003,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
999 | >> PAGE_SHIFT; | 1003 | >> PAGE_SHIFT; |
1000 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | 1004 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, |
1001 | v, level - 1, | 1005 | v, level - 1, |
1002 | 1, ACC_ALL, &table[index], | 1006 | 1, ACC_ALL, &table[index]); |
1003 | NULL); | ||
1004 | if (!new_table) { | 1007 | if (!new_table) { |
1005 | pgprintk("nonpaging_map: ENOMEM\n"); | 1008 | pgprintk("nonpaging_map: ENOMEM\n"); |
1006 | kvm_release_page_clean(page); | 1009 | kvm_release_page_clean(page); |
@@ -1020,15 +1023,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1020 | 1023 | ||
1021 | struct page *page; | 1024 | struct page *page; |
1022 | 1025 | ||
1026 | down_read(&vcpu->kvm->slots_lock); | ||
1027 | |||
1023 | down_read(¤t->mm->mmap_sem); | 1028 | down_read(¤t->mm->mmap_sem); |
1024 | page = gfn_to_page(vcpu->kvm, gfn); | 1029 | page = gfn_to_page(vcpu->kvm, gfn); |
1030 | up_read(¤t->mm->mmap_sem); | ||
1025 | 1031 | ||
1026 | spin_lock(&vcpu->kvm->mmu_lock); | 1032 | spin_lock(&vcpu->kvm->mmu_lock); |
1027 | kvm_mmu_free_some_pages(vcpu); | 1033 | kvm_mmu_free_some_pages(vcpu); |
1028 | r = __nonpaging_map(vcpu, v, write, gfn, page); | 1034 | r = __nonpaging_map(vcpu, v, write, gfn, page); |
1029 | spin_unlock(&vcpu->kvm->mmu_lock); | 1035 | spin_unlock(&vcpu->kvm->mmu_lock); |
1030 | 1036 | ||
1031 | up_read(¤t->mm->mmap_sem); | 1037 | up_read(&vcpu->kvm->slots_lock); |
1032 | 1038 | ||
1033 | return r; | 1039 | return r; |
1034 | } | 1040 | } |
@@ -1090,7 +1096,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1090 | 1096 | ||
1091 | ASSERT(!VALID_PAGE(root)); | 1097 | ASSERT(!VALID_PAGE(root)); |
1092 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 1098 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
1093 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL); | 1099 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); |
1094 | root = __pa(sp->spt); | 1100 | root = __pa(sp->spt); |
1095 | ++sp->root_count; | 1101 | ++sp->root_count; |
1096 | vcpu->arch.mmu.root_hpa = root; | 1102 | vcpu->arch.mmu.root_hpa = root; |
@@ -1111,7 +1117,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1111 | root_gfn = 0; | 1117 | root_gfn = 0; |
1112 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 1118 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
1113 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 1119 | PT32_ROOT_LEVEL, !is_paging(vcpu), |
1114 | ACC_ALL, NULL, NULL); | 1120 | ACC_ALL, NULL); |
1115 | root = __pa(sp->spt); | 1121 | root = __pa(sp->spt); |
1116 | ++sp->root_count; | 1122 | ++sp->root_count; |
1117 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 1123 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
@@ -1172,7 +1178,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
1172 | 1178 | ||
1173 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 1179 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
1174 | { | 1180 | { |
1175 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); | 1181 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3); |
1176 | mmu_free_roots(vcpu); | 1182 | mmu_free_roots(vcpu); |
1177 | } | 1183 | } |
1178 | 1184 | ||
@@ -1362,6 +1368,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1362 | gfn_t gfn; | 1368 | gfn_t gfn; |
1363 | int r; | 1369 | int r; |
1364 | u64 gpte = 0; | 1370 | u64 gpte = 0; |
1371 | struct page *page; | ||
1365 | 1372 | ||
1366 | if (bytes != 4 && bytes != 8) | 1373 | if (bytes != 4 && bytes != 8) |
1367 | return; | 1374 | return; |
@@ -1389,6 +1396,11 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1389 | if (!is_present_pte(gpte)) | 1396 | if (!is_present_pte(gpte)) |
1390 | return; | 1397 | return; |
1391 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 1398 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
1399 | |||
1400 | down_read(¤t->mm->mmap_sem); | ||
1401 | page = gfn_to_page(vcpu->kvm, gfn); | ||
1402 | up_read(¤t->mm->mmap_sem); | ||
1403 | |||
1392 | vcpu->arch.update_pte.gfn = gfn; | 1404 | vcpu->arch.update_pte.gfn = gfn; |
1393 | vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn); | 1405 | vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn); |
1394 | } | 1406 | } |
@@ -1496,9 +1508,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
1496 | gpa_t gpa; | 1508 | gpa_t gpa; |
1497 | int r; | 1509 | int r; |
1498 | 1510 | ||
1499 | down_read(¤t->mm->mmap_sem); | 1511 | down_read(&vcpu->kvm->slots_lock); |
1500 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1512 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); |
1501 | up_read(¤t->mm->mmap_sem); | 1513 | up_read(&vcpu->kvm->slots_lock); |
1502 | 1514 | ||
1503 | spin_lock(&vcpu->kvm->mmu_lock); | 1515 | spin_lock(&vcpu->kvm->mmu_lock); |
1504 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1516 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 03ba8608fe0f..ecc0856268c4 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, | |||
91 | pt_element_t *table; | 91 | pt_element_t *table; |
92 | struct page *page; | 92 | struct page *page; |
93 | 93 | ||
94 | down_read(¤t->mm->mmap_sem); | ||
94 | page = gfn_to_page(kvm, table_gfn); | 95 | page = gfn_to_page(kvm, table_gfn); |
96 | up_read(¤t->mm->mmap_sem); | ||
97 | |||
95 | table = kmap_atomic(page, KM_USER0); | 98 | table = kmap_atomic(page, KM_USER0); |
96 | 99 | ||
97 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | 100 | ret = CMPXCHG(&table[index], orig_pte, new_pte); |
@@ -140,7 +143,7 @@ walk: | |||
140 | } | 143 | } |
141 | #endif | 144 | #endif |
142 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || | 145 | ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || |
143 | (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); | 146 | (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0); |
144 | 147 | ||
145 | pt_access = ACC_ALL; | 148 | pt_access = ACC_ALL; |
146 | 149 | ||
@@ -297,7 +300,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
297 | u64 shadow_pte; | 300 | u64 shadow_pte; |
298 | int metaphysical; | 301 | int metaphysical; |
299 | gfn_t table_gfn; | 302 | gfn_t table_gfn; |
300 | bool new_page = 0; | ||
301 | 303 | ||
302 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | 304 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; |
303 | if (level == PT_PAGE_TABLE_LEVEL) | 305 | if (level == PT_PAGE_TABLE_LEVEL) |
@@ -319,8 +321,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
319 | } | 321 | } |
320 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 322 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, |
321 | metaphysical, access, | 323 | metaphysical, access, |
322 | shadow_ent, &new_page); | 324 | shadow_ent); |
323 | if (new_page && !metaphysical) { | 325 | if (!metaphysical) { |
324 | int r; | 326 | int r; |
325 | pt_element_t curr_pte; | 327 | pt_element_t curr_pte; |
326 | r = kvm_read_guest_atomic(vcpu->kvm, | 328 | r = kvm_read_guest_atomic(vcpu->kvm, |
@@ -378,7 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
378 | if (r) | 380 | if (r) |
379 | return r; | 381 | return r; |
380 | 382 | ||
381 | down_read(¤t->mm->mmap_sem); | 383 | down_read(&vcpu->kvm->slots_lock); |
382 | /* | 384 | /* |
383 | * Look up the shadow pte for the faulting address. | 385 | * Look up the shadow pte for the faulting address. |
384 | */ | 386 | */ |
@@ -392,11 +394,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
392 | pgprintk("%s: guest page fault\n", __FUNCTION__); | 394 | pgprintk("%s: guest page fault\n", __FUNCTION__); |
393 | inject_page_fault(vcpu, addr, walker.error_code); | 395 | inject_page_fault(vcpu, addr, walker.error_code); |
394 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ | 396 | vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ |
395 | up_read(¤t->mm->mmap_sem); | 397 | up_read(&vcpu->kvm->slots_lock); |
396 | return 0; | 398 | return 0; |
397 | } | 399 | } |
398 | 400 | ||
401 | down_read(¤t->mm->mmap_sem); | ||
399 | page = gfn_to_page(vcpu->kvm, walker.gfn); | 402 | page = gfn_to_page(vcpu->kvm, walker.gfn); |
403 | up_read(¤t->mm->mmap_sem); | ||
400 | 404 | ||
401 | spin_lock(&vcpu->kvm->mmu_lock); | 405 | spin_lock(&vcpu->kvm->mmu_lock); |
402 | kvm_mmu_free_some_pages(vcpu); | 406 | kvm_mmu_free_some_pages(vcpu); |
@@ -413,14 +417,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
413 | */ | 417 | */ |
414 | if (shadow_pte && is_io_pte(*shadow_pte)) { | 418 | if (shadow_pte && is_io_pte(*shadow_pte)) { |
415 | spin_unlock(&vcpu->kvm->mmu_lock); | 419 | spin_unlock(&vcpu->kvm->mmu_lock); |
416 | up_read(¤t->mm->mmap_sem); | 420 | up_read(&vcpu->kvm->slots_lock); |
417 | return 1; | 421 | return 1; |
418 | } | 422 | } |
419 | 423 | ||
420 | ++vcpu->stat.pf_fixed; | 424 | ++vcpu->stat.pf_fixed; |
421 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); | 425 | kvm_mmu_audit(vcpu, "post page fault (fixed)"); |
422 | spin_unlock(&vcpu->kvm->mmu_lock); | 426 | spin_unlock(&vcpu->kvm->mmu_lock); |
423 | up_read(¤t->mm->mmap_sem); | 427 | up_read(&vcpu->kvm->slots_lock); |
424 | 428 | ||
425 | return write_pt; | 429 | return write_pt; |
426 | } | 430 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index de755cb1431d..1a582f1090e8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -792,6 +792,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
792 | vcpu->arch.cr0 = cr0; | 792 | vcpu->arch.cr0 = cr0; |
793 | cr0 |= X86_CR0_PG | X86_CR0_WP; | 793 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
794 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | 794 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); |
795 | if (!vcpu->fpu_active) { | ||
796 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
797 | cr0 |= X86_CR0_TS; | ||
798 | } | ||
795 | svm->vmcb->save.cr0 = cr0; | 799 | svm->vmcb->save.cr0 = cr0; |
796 | } | 800 | } |
797 | 801 | ||
@@ -1096,6 +1100,24 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
1096 | case MSR_IA32_SYSENTER_ESP: | 1100 | case MSR_IA32_SYSENTER_ESP: |
1097 | *data = svm->vmcb->save.sysenter_esp; | 1101 | *data = svm->vmcb->save.sysenter_esp; |
1098 | break; | 1102 | break; |
1103 | /* Nobody will change the following 5 values in the VMCB so | ||
1104 | we can safely return them on rdmsr. They will always be 0 | ||
1105 | until LBRV is implemented. */ | ||
1106 | case MSR_IA32_DEBUGCTLMSR: | ||
1107 | *data = svm->vmcb->save.dbgctl; | ||
1108 | break; | ||
1109 | case MSR_IA32_LASTBRANCHFROMIP: | ||
1110 | *data = svm->vmcb->save.br_from; | ||
1111 | break; | ||
1112 | case MSR_IA32_LASTBRANCHTOIP: | ||
1113 | *data = svm->vmcb->save.br_to; | ||
1114 | break; | ||
1115 | case MSR_IA32_LASTINTFROMIP: | ||
1116 | *data = svm->vmcb->save.last_excp_from; | ||
1117 | break; | ||
1118 | case MSR_IA32_LASTINTTOIP: | ||
1119 | *data = svm->vmcb->save.last_excp_to; | ||
1120 | break; | ||
1099 | default: | 1121 | default: |
1100 | return kvm_get_msr_common(vcpu, ecx, data); | 1122 | return kvm_get_msr_common(vcpu, ecx, data); |
1101 | } | 1123 | } |
@@ -1156,6 +1178,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
1156 | case MSR_IA32_SYSENTER_ESP: | 1178 | case MSR_IA32_SYSENTER_ESP: |
1157 | svm->vmcb->save.sysenter_esp = data; | 1179 | svm->vmcb->save.sysenter_esp = data; |
1158 | break; | 1180 | break; |
1181 | case MSR_IA32_DEBUGCTLMSR: | ||
1182 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", | ||
1183 | __FUNCTION__, data); | ||
1184 | break; | ||
1159 | case MSR_K7_EVNTSEL0: | 1185 | case MSR_K7_EVNTSEL0: |
1160 | case MSR_K7_EVNTSEL1: | 1186 | case MSR_K7_EVNTSEL1: |
1161 | case MSR_K7_EVNTSEL2: | 1187 | case MSR_K7_EVNTSEL2: |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad36447e696e..94ea724638fd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -638,6 +638,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
638 | { | 638 | { |
639 | int save_nmsrs; | 639 | int save_nmsrs; |
640 | 640 | ||
641 | vmx_load_host_state(vmx); | ||
641 | save_nmsrs = 0; | 642 | save_nmsrs = 0; |
642 | #ifdef CONFIG_X86_64 | 643 | #ifdef CONFIG_X86_64 |
643 | if (is_long_mode(&vmx->vcpu)) { | 644 | if (is_long_mode(&vmx->vcpu)) { |
@@ -1477,7 +1478,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
1477 | struct kvm_userspace_memory_region kvm_userspace_mem; | 1478 | struct kvm_userspace_memory_region kvm_userspace_mem; |
1478 | int r = 0; | 1479 | int r = 0; |
1479 | 1480 | ||
1480 | down_write(¤t->mm->mmap_sem); | 1481 | down_write(&kvm->slots_lock); |
1481 | if (kvm->arch.apic_access_page) | 1482 | if (kvm->arch.apic_access_page) |
1482 | goto out; | 1483 | goto out; |
1483 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 1484 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
@@ -1487,9 +1488,12 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
1487 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); | 1488 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); |
1488 | if (r) | 1489 | if (r) |
1489 | goto out; | 1490 | goto out; |
1491 | |||
1492 | down_read(¤t->mm->mmap_sem); | ||
1490 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 1493 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
1494 | up_read(¤t->mm->mmap_sem); | ||
1491 | out: | 1495 | out: |
1492 | up_write(¤t->mm->mmap_sem); | 1496 | up_write(&kvm->slots_lock); |
1493 | return r; | 1497 | return r; |
1494 | } | 1498 | } |
1495 | 1499 | ||
@@ -1602,9 +1606,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1602 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 1606 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
1603 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 1607 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); |
1604 | 1608 | ||
1605 | if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) | ||
1606 | if (alloc_apic_access_page(vmx->vcpu.kvm) != 0) | ||
1607 | return -ENOMEM; | ||
1608 | 1609 | ||
1609 | return 0; | 1610 | return 0; |
1610 | } | 1611 | } |
@@ -2534,6 +2535,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2534 | put_cpu(); | 2535 | put_cpu(); |
2535 | if (err) | 2536 | if (err) |
2536 | goto free_vmcs; | 2537 | goto free_vmcs; |
2538 | if (vm_need_virtualize_apic_accesses(kvm)) | ||
2539 | if (alloc_apic_access_page(kvm) != 0) | ||
2540 | goto free_vmcs; | ||
2537 | 2541 | ||
2538 | return &vmx->vcpu; | 2542 | return &vmx->vcpu; |
2539 | 2543 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf5308148689..6b01552bd1f1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -46,6 +46,9 @@ | |||
46 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | 46 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM |
47 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 47 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
48 | 48 | ||
49 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | ||
50 | struct kvm_cpuid_entry2 __user *entries); | ||
51 | |||
49 | struct kvm_x86_ops *kvm_x86_ops; | 52 | struct kvm_x86_ops *kvm_x86_ops; |
50 | 53 | ||
51 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 54 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
@@ -181,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
181 | int ret; | 184 | int ret; |
182 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; | 185 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; |
183 | 186 | ||
184 | down_read(¤t->mm->mmap_sem); | 187 | down_read(&vcpu->kvm->slots_lock); |
185 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, | 188 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, |
186 | offset * sizeof(u64), sizeof(pdpte)); | 189 | offset * sizeof(u64), sizeof(pdpte)); |
187 | if (ret < 0) { | 190 | if (ret < 0) { |
@@ -198,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
198 | 201 | ||
199 | memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); | 202 | memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); |
200 | out: | 203 | out: |
201 | up_read(¤t->mm->mmap_sem); | 204 | up_read(&vcpu->kvm->slots_lock); |
202 | 205 | ||
203 | return ret; | 206 | return ret; |
204 | } | 207 | } |
@@ -212,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) | |||
212 | if (is_long_mode(vcpu) || !is_pae(vcpu)) | 215 | if (is_long_mode(vcpu) || !is_pae(vcpu)) |
213 | return false; | 216 | return false; |
214 | 217 | ||
215 | down_read(¤t->mm->mmap_sem); | 218 | down_read(&vcpu->kvm->slots_lock); |
216 | r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); | 219 | r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); |
217 | if (r < 0) | 220 | if (r < 0) |
218 | goto out; | 221 | goto out; |
219 | changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; | 222 | changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; |
220 | out: | 223 | out: |
221 | up_read(¤t->mm->mmap_sem); | 224 | up_read(&vcpu->kvm->slots_lock); |
222 | 225 | ||
223 | return changed; | 226 | return changed; |
224 | } | 227 | } |
@@ -356,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
356 | */ | 359 | */ |
357 | } | 360 | } |
358 | 361 | ||
359 | down_read(¤t->mm->mmap_sem); | 362 | down_read(&vcpu->kvm->slots_lock); |
360 | /* | 363 | /* |
361 | * Does the new cr3 value map to physical memory? (Note, we | 364 | * Does the new cr3 value map to physical memory? (Note, we |
362 | * catch an invalid cr3 even in real-mode, because it would | 365 | * catch an invalid cr3 even in real-mode, because it would |
@@ -372,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
372 | vcpu->arch.cr3 = cr3; | 375 | vcpu->arch.cr3 = cr3; |
373 | vcpu->arch.mmu.new_cr3(vcpu); | 376 | vcpu->arch.mmu.new_cr3(vcpu); |
374 | } | 377 | } |
375 | up_read(¤t->mm->mmap_sem); | 378 | up_read(&vcpu->kvm->slots_lock); |
376 | } | 379 | } |
377 | EXPORT_SYMBOL_GPL(set_cr3); | 380 | EXPORT_SYMBOL_GPL(set_cr3); |
378 | 381 | ||
@@ -484,6 +487,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
484 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", | 487 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", |
485 | __FUNCTION__, data); | 488 | __FUNCTION__, data); |
486 | break; | 489 | break; |
490 | case MSR_IA32_MCG_CTL: | ||
491 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", | ||
492 | __FUNCTION__, data); | ||
493 | break; | ||
487 | case MSR_IA32_UCODE_REV: | 494 | case MSR_IA32_UCODE_REV: |
488 | case MSR_IA32_UCODE_WRITE: | 495 | case MSR_IA32_UCODE_WRITE: |
489 | case 0x200 ... 0x2ff: /* MTRRs */ | 496 | case 0x200 ... 0x2ff: /* MTRRs */ |
@@ -526,6 +533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
526 | case MSR_IA32_MC0_CTL: | 533 | case MSR_IA32_MC0_CTL: |
527 | case MSR_IA32_MCG_STATUS: | 534 | case MSR_IA32_MCG_STATUS: |
528 | case MSR_IA32_MCG_CAP: | 535 | case MSR_IA32_MCG_CAP: |
536 | case MSR_IA32_MCG_CTL: | ||
529 | case MSR_IA32_MC0_MISC: | 537 | case MSR_IA32_MC0_MISC: |
530 | case MSR_IA32_MC0_MISC+4: | 538 | case MSR_IA32_MC0_MISC+4: |
531 | case MSR_IA32_MC0_MISC+8: | 539 | case MSR_IA32_MC0_MISC+8: |
@@ -727,6 +735,24 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
727 | r = 0; | 735 | r = 0; |
728 | break; | 736 | break; |
729 | } | 737 | } |
738 | case KVM_GET_SUPPORTED_CPUID: { | ||
739 | struct kvm_cpuid2 __user *cpuid_arg = argp; | ||
740 | struct kvm_cpuid2 cpuid; | ||
741 | |||
742 | r = -EFAULT; | ||
743 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) | ||
744 | goto out; | ||
745 | r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, | ||
746 | cpuid_arg->entries); | ||
747 | if (r) | ||
748 | goto out; | ||
749 | |||
750 | r = -EFAULT; | ||
751 | if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) | ||
752 | goto out; | ||
753 | r = 0; | ||
754 | break; | ||
755 | } | ||
730 | default: | 756 | default: |
731 | r = -EINVAL; | 757 | r = -EINVAL; |
732 | } | 758 | } |
@@ -974,8 +1000,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
974 | put_cpu(); | 1000 | put_cpu(); |
975 | } | 1001 | } |
976 | 1002 | ||
977 | static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm, | 1003 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, |
978 | struct kvm_cpuid2 *cpuid, | ||
979 | struct kvm_cpuid_entry2 __user *entries) | 1004 | struct kvm_cpuid_entry2 __user *entries) |
980 | { | 1005 | { |
981 | struct kvm_cpuid_entry2 *cpuid_entries; | 1006 | struct kvm_cpuid_entry2 *cpuid_entries; |
@@ -1207,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
1207 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) | 1232 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) |
1208 | return -EINVAL; | 1233 | return -EINVAL; |
1209 | 1234 | ||
1210 | down_write(¤t->mm->mmap_sem); | 1235 | down_write(&kvm->slots_lock); |
1211 | 1236 | ||
1212 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 1237 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
1213 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 1238 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
1214 | 1239 | ||
1215 | up_write(¤t->mm->mmap_sem); | 1240 | up_write(&kvm->slots_lock); |
1216 | return 0; | 1241 | return 0; |
1217 | } | 1242 | } |
1218 | 1243 | ||
@@ -1261,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1261 | < alias->target_phys_addr) | 1286 | < alias->target_phys_addr) |
1262 | goto out; | 1287 | goto out; |
1263 | 1288 | ||
1264 | down_write(¤t->mm->mmap_sem); | 1289 | down_write(&kvm->slots_lock); |
1265 | 1290 | ||
1266 | p = &kvm->arch.aliases[alias->slot]; | 1291 | p = &kvm->arch.aliases[alias->slot]; |
1267 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 1292 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
@@ -1275,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1275 | 1300 | ||
1276 | kvm_mmu_zap_all(kvm); | 1301 | kvm_mmu_zap_all(kvm); |
1277 | 1302 | ||
1278 | up_write(¤t->mm->mmap_sem); | 1303 | up_write(&kvm->slots_lock); |
1279 | 1304 | ||
1280 | return 0; | 1305 | return 0; |
1281 | 1306 | ||
@@ -1351,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1351 | struct kvm_memory_slot *memslot; | 1376 | struct kvm_memory_slot *memslot; |
1352 | int is_dirty = 0; | 1377 | int is_dirty = 0; |
1353 | 1378 | ||
1354 | down_write(¤t->mm->mmap_sem); | 1379 | down_write(&kvm->slots_lock); |
1355 | 1380 | ||
1356 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 1381 | r = kvm_get_dirty_log(kvm, log, &is_dirty); |
1357 | if (r) | 1382 | if (r) |
@@ -1367,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1367 | } | 1392 | } |
1368 | r = 0; | 1393 | r = 0; |
1369 | out: | 1394 | out: |
1370 | up_write(¤t->mm->mmap_sem); | 1395 | up_write(&kvm->slots_lock); |
1371 | return r; | 1396 | return r; |
1372 | } | 1397 | } |
1373 | 1398 | ||
@@ -1487,24 +1512,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
1487 | r = 0; | 1512 | r = 0; |
1488 | break; | 1513 | break; |
1489 | } | 1514 | } |
1490 | case KVM_GET_SUPPORTED_CPUID: { | ||
1491 | struct kvm_cpuid2 __user *cpuid_arg = argp; | ||
1492 | struct kvm_cpuid2 cpuid; | ||
1493 | |||
1494 | r = -EFAULT; | ||
1495 | if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) | ||
1496 | goto out; | ||
1497 | r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid, | ||
1498 | cpuid_arg->entries); | ||
1499 | if (r) | ||
1500 | goto out; | ||
1501 | |||
1502 | r = -EFAULT; | ||
1503 | if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) | ||
1504 | goto out; | ||
1505 | r = 0; | ||
1506 | break; | ||
1507 | } | ||
1508 | default: | 1515 | default: |
1509 | ; | 1516 | ; |
1510 | } | 1517 | } |
@@ -1563,7 +1570,7 @@ int emulator_read_std(unsigned long addr, | |||
1563 | void *data = val; | 1570 | void *data = val; |
1564 | int r = X86EMUL_CONTINUE; | 1571 | int r = X86EMUL_CONTINUE; |
1565 | 1572 | ||
1566 | down_read(¤t->mm->mmap_sem); | 1573 | down_read(&vcpu->kvm->slots_lock); |
1567 | while (bytes) { | 1574 | while (bytes) { |
1568 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1575 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1569 | unsigned offset = addr & (PAGE_SIZE-1); | 1576 | unsigned offset = addr & (PAGE_SIZE-1); |
@@ -1585,7 +1592,7 @@ int emulator_read_std(unsigned long addr, | |||
1585 | addr += tocopy; | 1592 | addr += tocopy; |
1586 | } | 1593 | } |
1587 | out: | 1594 | out: |
1588 | up_read(¤t->mm->mmap_sem); | 1595 | up_read(&vcpu->kvm->slots_lock); |
1589 | return r; | 1596 | return r; |
1590 | } | 1597 | } |
1591 | EXPORT_SYMBOL_GPL(emulator_read_std); | 1598 | EXPORT_SYMBOL_GPL(emulator_read_std); |
@@ -1604,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr, | |||
1604 | return X86EMUL_CONTINUE; | 1611 | return X86EMUL_CONTINUE; |
1605 | } | 1612 | } |
1606 | 1613 | ||
1607 | down_read(¤t->mm->mmap_sem); | 1614 | down_read(&vcpu->kvm->slots_lock); |
1608 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1615 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1609 | up_read(¤t->mm->mmap_sem); | 1616 | up_read(&vcpu->kvm->slots_lock); |
1610 | 1617 | ||
1611 | /* For APIC access vmexit */ | 1618 | /* For APIC access vmexit */ |
1612 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 1619 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -1644,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1644 | { | 1651 | { |
1645 | int ret; | 1652 | int ret; |
1646 | 1653 | ||
1647 | down_read(¤t->mm->mmap_sem); | 1654 | down_read(&vcpu->kvm->slots_lock); |
1648 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 1655 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
1649 | if (ret < 0) { | 1656 | if (ret < 0) { |
1650 | up_read(¤t->mm->mmap_sem); | 1657 | up_read(&vcpu->kvm->slots_lock); |
1651 | return 0; | 1658 | return 0; |
1652 | } | 1659 | } |
1653 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); | 1660 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); |
1654 | up_read(¤t->mm->mmap_sem); | 1661 | up_read(&vcpu->kvm->slots_lock); |
1655 | return 1; | 1662 | return 1; |
1656 | } | 1663 | } |
1657 | 1664 | ||
@@ -1663,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
1663 | struct kvm_io_device *mmio_dev; | 1670 | struct kvm_io_device *mmio_dev; |
1664 | gpa_t gpa; | 1671 | gpa_t gpa; |
1665 | 1672 | ||
1666 | down_read(¤t->mm->mmap_sem); | 1673 | down_read(&vcpu->kvm->slots_lock); |
1667 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1674 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1668 | up_read(¤t->mm->mmap_sem); | 1675 | up_read(&vcpu->kvm->slots_lock); |
1669 | 1676 | ||
1670 | if (gpa == UNMAPPED_GVA) { | 1677 | if (gpa == UNMAPPED_GVA) { |
1671 | kvm_inject_page_fault(vcpu, addr, 2); | 1678 | kvm_inject_page_fault(vcpu, addr, 2); |
@@ -1742,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1742 | char *kaddr; | 1749 | char *kaddr; |
1743 | u64 val; | 1750 | u64 val; |
1744 | 1751 | ||
1745 | down_read(¤t->mm->mmap_sem); | 1752 | down_read(&vcpu->kvm->slots_lock); |
1746 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 1753 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); |
1747 | 1754 | ||
1748 | if (gpa == UNMAPPED_GVA || | 1755 | if (gpa == UNMAPPED_GVA || |
@@ -1753,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
1753 | goto emul_write; | 1760 | goto emul_write; |
1754 | 1761 | ||
1755 | val = *(u64 *)new; | 1762 | val = *(u64 *)new; |
1763 | |||
1764 | down_read(¤t->mm->mmap_sem); | ||
1756 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1765 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
1766 | up_read(¤t->mm->mmap_sem); | ||
1767 | |||
1757 | kaddr = kmap_atomic(page, KM_USER0); | 1768 | kaddr = kmap_atomic(page, KM_USER0); |
1758 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 1769 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); |
1759 | kunmap_atomic(kaddr, KM_USER0); | 1770 | kunmap_atomic(kaddr, KM_USER0); |
1760 | kvm_release_page_dirty(page); | 1771 | kvm_release_page_dirty(page); |
1761 | emul_write: | 1772 | emul_write: |
1762 | up_read(¤t->mm->mmap_sem); | 1773 | up_read(&vcpu->kvm->slots_lock); |
1763 | } | 1774 | } |
1764 | #endif | 1775 | #endif |
1765 | 1776 | ||
@@ -2152,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2152 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2163 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
2153 | 2164 | ||
2154 | for (i = 0; i < nr_pages; ++i) { | 2165 | for (i = 0; i < nr_pages; ++i) { |
2155 | down_read(¤t->mm->mmap_sem); | 2166 | down_read(&vcpu->kvm->slots_lock); |
2156 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | 2167 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); |
2157 | vcpu->arch.pio.guest_pages[i] = page; | 2168 | vcpu->arch.pio.guest_pages[i] = page; |
2158 | up_read(¤t->mm->mmap_sem); | 2169 | up_read(&vcpu->kvm->slots_lock); |
2159 | if (!page) { | 2170 | if (!page) { |
2160 | kvm_inject_gp(vcpu, 0); | 2171 | kvm_inject_gp(vcpu, 0); |
2161 | free_pio_guest_pages(vcpu); | 2172 | free_pio_guest_pages(vcpu); |
@@ -2478,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
2478 | 2489 | ||
2479 | down_read(¤t->mm->mmap_sem); | 2490 | down_read(¤t->mm->mmap_sem); |
2480 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 2491 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
2481 | vcpu->arch.apic->vapic_page = page; | ||
2482 | up_read(¤t->mm->mmap_sem); | 2492 | up_read(¤t->mm->mmap_sem); |
2493 | |||
2494 | vcpu->arch.apic->vapic_page = page; | ||
2483 | } | 2495 | } |
2484 | 2496 | ||
2485 | static void vapic_exit(struct kvm_vcpu *vcpu) | 2497 | static void vapic_exit(struct kvm_vcpu *vcpu) |
@@ -2861,8 +2873,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
2861 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 2873 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
2862 | 2874 | ||
2863 | mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; | 2875 | mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; |
2864 | vcpu->arch.cr0 = sregs->cr0; | ||
2865 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 2876 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
2877 | vcpu->arch.cr0 = sregs->cr0; | ||
2866 | 2878 | ||
2867 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 2879 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; |
2868 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 2880 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
@@ -2952,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
2952 | gpa_t gpa; | 2964 | gpa_t gpa; |
2953 | 2965 | ||
2954 | vcpu_load(vcpu); | 2966 | vcpu_load(vcpu); |
2955 | down_read(¤t->mm->mmap_sem); | 2967 | down_read(&vcpu->kvm->slots_lock); |
2956 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); | 2968 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); |
2957 | up_read(¤t->mm->mmap_sem); | 2969 | up_read(&vcpu->kvm->slots_lock); |
2958 | tr->physical_address = gpa; | 2970 | tr->physical_address = gpa; |
2959 | tr->valid = gpa != UNMAPPED_GVA; | 2971 | tr->valid = gpa != UNMAPPED_GVA; |
2960 | tr->writeable = 1; | 2972 | tr->writeable = 1; |
@@ -3227,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3227 | */ | 3239 | */ |
3228 | if (!user_alloc) { | 3240 | if (!user_alloc) { |
3229 | if (npages && !old.rmap) { | 3241 | if (npages && !old.rmap) { |
3242 | down_write(¤t->mm->mmap_sem); | ||
3230 | memslot->userspace_addr = do_mmap(NULL, 0, | 3243 | memslot->userspace_addr = do_mmap(NULL, 0, |
3231 | npages * PAGE_SIZE, | 3244 | npages * PAGE_SIZE, |
3232 | PROT_READ | PROT_WRITE, | 3245 | PROT_READ | PROT_WRITE, |
3233 | MAP_SHARED | MAP_ANONYMOUS, | 3246 | MAP_SHARED | MAP_ANONYMOUS, |
3234 | 0); | 3247 | 0); |
3248 | up_write(¤t->mm->mmap_sem); | ||
3235 | 3249 | ||
3236 | if (IS_ERR((void *)memslot->userspace_addr)) | 3250 | if (IS_ERR((void *)memslot->userspace_addr)) |
3237 | return PTR_ERR((void *)memslot->userspace_addr); | 3251 | return PTR_ERR((void *)memslot->userspace_addr); |
@@ -3239,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3239 | if (!old.user_alloc && old.rmap) { | 3253 | if (!old.user_alloc && old.rmap) { |
3240 | int ret; | 3254 | int ret; |
3241 | 3255 | ||
3256 | down_write(¤t->mm->mmap_sem); | ||
3242 | ret = do_munmap(current->mm, old.userspace_addr, | 3257 | ret = do_munmap(current->mm, old.userspace_addr, |
3243 | old.npages * PAGE_SIZE); | 3258 | old.npages * PAGE_SIZE); |
3259 | up_write(¤t->mm->mmap_sem); | ||
3244 | if (ret < 0) | 3260 | if (ret < 0) |
3245 | printk(KERN_WARNING | 3261 | printk(KERN_WARNING |
3246 | "kvm_vm_ioctl_set_memory_region: " | 3262 | "kvm_vm_ioctl_set_memory_region: " |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5afdde4895dc..a104c532ff70 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/lguest_launcher.h> | 57 | #include <linux/lguest_launcher.h> |
58 | #include <linux/virtio_console.h> | 58 | #include <linux/virtio_console.h> |
59 | #include <linux/pm.h> | 59 | #include <linux/pm.h> |
60 | #include <asm/lguest.h> | ||
60 | #include <asm/paravirt.h> | 61 | #include <asm/paravirt.h> |
61 | #include <asm/param.h> | 62 | #include <asm/param.h> |
62 | #include <asm/page.h> | 63 | #include <asm/page.h> |
@@ -75,15 +76,6 @@ | |||
75 | * behaving in simplified but equivalent ways. In particular, the Guest is the | 76 | * behaving in simplified but equivalent ways. In particular, the Guest is the |
76 | * same kernel as the Host (or at least, built from the same source code). :*/ | 77 | * same kernel as the Host (or at least, built from the same source code). :*/ |
77 | 78 | ||
78 | /* Declarations for definitions in lguest_guest.S */ | ||
79 | extern char lguest_noirq_start[], lguest_noirq_end[]; | ||
80 | extern const char lgstart_cli[], lgend_cli[]; | ||
81 | extern const char lgstart_sti[], lgend_sti[]; | ||
82 | extern const char lgstart_popf[], lgend_popf[]; | ||
83 | extern const char lgstart_pushf[], lgend_pushf[]; | ||
84 | extern const char lgstart_iret[], lgend_iret[]; | ||
85 | extern void lguest_iret(void); | ||
86 | |||
87 | struct lguest_data lguest_data = { | 79 | struct lguest_data lguest_data = { |
88 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, | 80 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, |
89 | .noirq_start = (u32)lguest_noirq_start, | 81 | .noirq_start = (u32)lguest_noirq_start, |
@@ -92,7 +84,6 @@ struct lguest_data lguest_data = { | |||
92 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ | 84 | .blocked_interrupts = { 1 }, /* Block timer interrupts */ |
93 | .syscall_vec = SYSCALL_VECTOR, | 85 | .syscall_vec = SYSCALL_VECTOR, |
94 | }; | 86 | }; |
95 | static cycle_t clock_base; | ||
96 | 87 | ||
97 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a | 88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a |
98 | * ring buffer of stored hypercalls which the Host will run though next time we | 89 | * ring buffer of stored hypercalls which the Host will run though next time we |
@@ -335,8 +326,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
335 | case 1: /* Basic feature request. */ | 326 | case 1: /* Basic feature request. */ |
336 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ | 327 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ |
337 | *cx &= 0x00002201; | 328 | *cx &= 0x00002201; |
338 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ | 329 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ |
339 | *dx &= 0x07808101; | 330 | *dx &= 0x07808111; |
340 | /* The Host can do a nice optimization if it knows that the | 331 | /* The Host can do a nice optimization if it knows that the |
341 | * kernel mappings (addresses above 0xC0000000 or whatever | 332 | * kernel mappings (addresses above 0xC0000000 or whatever |
342 | * PAGE_OFFSET is set to) haven't changed. But Linux calls | 333 | * PAGE_OFFSET is set to) haven't changed. But Linux calls |
@@ -603,19 +594,25 @@ static unsigned long lguest_get_wallclock(void) | |||
603 | return lguest_data.time.tv_sec; | 594 | return lguest_data.time.tv_sec; |
604 | } | 595 | } |
605 | 596 | ||
597 | /* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at, | ||
598 | * or 0 if it's unusable as a reliable clock source. This matches what we want | ||
599 | * here: if we return 0 from this function, the x86 TSC clock will not register | ||
600 | * itself. */ | ||
601 | static unsigned long lguest_cpu_khz(void) | ||
602 | { | ||
603 | return lguest_data.tsc_khz; | ||
604 | } | ||
605 | |||
606 | /* If we can't use the TSC, the kernel falls back to our "lguest_clock", where | ||
607 | * we read the time value given to us by the Host. */ | ||
606 | static cycle_t lguest_clock_read(void) | 608 | static cycle_t lguest_clock_read(void) |
607 | { | 609 | { |
608 | unsigned long sec, nsec; | 610 | unsigned long sec, nsec; |
609 | 611 | ||
610 | /* If the Host tells the TSC speed, we can trust that. */ | 612 | /* Since the time is in two parts (seconds and nanoseconds), we risk |
611 | if (lguest_data.tsc_khz) | 613 | * reading it just as it's changing from 99 & 0.999999999 to 100 and 0, |
612 | return native_read_tsc(); | 614 | * and getting 99 and 0. As Linux tends to come apart under the stress |
613 | 615 | * of time travel, we must be careful: */ | |
614 | /* If we can't use the TSC, we read the time value written by the Host. | ||
615 | * Since it's in two parts (seconds and nanoseconds), we risk reading | ||
616 | * it just as it's changing from 99 & 0.999999999 to 100 and 0, and | ||
617 | * getting 99 and 0. As Linux tends to come apart under the stress of | ||
618 | * time travel, we must be careful: */ | ||
619 | do { | 616 | do { |
620 | /* First we read the seconds part. */ | 617 | /* First we read the seconds part. */ |
621 | sec = lguest_data.time.tv_sec; | 618 | sec = lguest_data.time.tv_sec; |
@@ -630,14 +627,14 @@ static cycle_t lguest_clock_read(void) | |||
630 | /* Now if the seconds part has changed, try again. */ | 627 | /* Now if the seconds part has changed, try again. */ |
631 | } while (unlikely(lguest_data.time.tv_sec != sec)); | 628 | } while (unlikely(lguest_data.time.tv_sec != sec)); |
632 | 629 | ||
633 | /* Our non-TSC clock is in real nanoseconds. */ | 630 | /* Our lguest clock is in real nanoseconds. */ |
634 | return sec*1000000000ULL + nsec; | 631 | return sec*1000000000ULL + nsec; |
635 | } | 632 | } |
636 | 633 | ||
637 | /* This is what we tell the kernel is our clocksource. */ | 634 | /* This is the fallback clocksource: lower priority than the TSC clocksource. */ |
638 | static struct clocksource lguest_clock = { | 635 | static struct clocksource lguest_clock = { |
639 | .name = "lguest", | 636 | .name = "lguest", |
640 | .rating = 400, | 637 | .rating = 200, |
641 | .read = lguest_clock_read, | 638 | .read = lguest_clock_read, |
642 | .mask = CLOCKSOURCE_MASK(64), | 639 | .mask = CLOCKSOURCE_MASK(64), |
643 | .mult = 1 << 22, | 640 | .mult = 1 << 22, |
@@ -645,12 +642,6 @@ static struct clocksource lguest_clock = { | |||
645 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 642 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
646 | }; | 643 | }; |
647 | 644 | ||
648 | /* The "scheduler clock" is just our real clock, adjusted to start at zero */ | ||
649 | static unsigned long long lguest_sched_clock(void) | ||
650 | { | ||
651 | return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); | ||
652 | } | ||
653 | |||
654 | /* We also need a "struct clock_event_device": Linux asks us to set it to go | 645 | /* We also need a "struct clock_event_device": Linux asks us to set it to go |
655 | * off some time in the future. Actually, James Morris figured all this out, I | 646 | * off some time in the future. Actually, James Morris figured all this out, I |
656 | * just applied the patch. */ | 647 | * just applied the patch. */ |
@@ -720,19 +711,8 @@ static void lguest_time_init(void) | |||
720 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 711 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
721 | set_irq_handler(0, lguest_time_irq); | 712 | set_irq_handler(0, lguest_time_irq); |
722 | 713 | ||
723 | /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can | ||
724 | * use the TSC, otherwise it's a dumb nanosecond-resolution clock. | ||
725 | * Either way, the "rating" is set so high that it's always chosen over | ||
726 | * any other clocksource. */ | ||
727 | if (lguest_data.tsc_khz) | ||
728 | lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, | ||
729 | lguest_clock.shift); | ||
730 | clock_base = lguest_clock_read(); | ||
731 | clocksource_register(&lguest_clock); | 714 | clocksource_register(&lguest_clock); |
732 | 715 | ||
733 | /* Now we've set up our clock, we can use it as the scheduler clock */ | ||
734 | pv_time_ops.sched_clock = lguest_sched_clock; | ||
735 | |||
736 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 716 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
737 | * here and register our timer device. */ | 717 | * here and register our timer device. */ |
738 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 718 | lguest_clockevent.cpumask = cpumask_of_cpu(0); |
@@ -1003,6 +983,7 @@ __init void lguest_init(void) | |||
1003 | /* time operations */ | 983 | /* time operations */ |
1004 | pv_time_ops.get_wallclock = lguest_get_wallclock; | 984 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
1005 | pv_time_ops.time_init = lguest_time_init; | 985 | pv_time_ops.time_init = lguest_time_init; |
986 | pv_time_ops.get_cpu_khz = lguest_cpu_khz; | ||
1006 | 987 | ||
1007 | /* Now is a good time to look at the implementations of these functions | 988 | /* Now is a good time to look at the implementations of these functions |
1008 | * before returning to the rest of lguest_init(). */ | 989 | * before returning to the rest of lguest_init(). */ |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bb652f5a93fb..a02a14f0f324 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) | |||
172 | } | 172 | } |
173 | 173 | ||
174 | /* | 174 | /* |
175 | * The head.S code sets up the kernel high mapping from: | 175 | * The head.S code sets up the kernel high mapping: |
176 | * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE | 176 | * |
177 | * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) | ||
177 | * | 178 | * |
178 | * phys_addr holds the negative offset to the kernel, which is added | 179 | * phys_addr holds the negative offset to the kernel, which is added |
179 | * to the compile time generated pmds. This results in invalid pmds up | 180 | * to the compile time generated pmds. This results in invalid pmds up |
@@ -515,14 +516,6 @@ void __init mem_init(void) | |||
515 | 516 | ||
516 | /* clear_bss() already clear the empty_zero_page */ | 517 | /* clear_bss() already clear the empty_zero_page */ |
517 | 518 | ||
518 | /* temporary debugging - double check it's true: */ | ||
519 | { | ||
520 | int i; | ||
521 | |||
522 | for (i = 0; i < 1024; i++) | ||
523 | WARN_ON_ONCE(empty_zero_page[i]); | ||
524 | } | ||
525 | |||
526 | reservedpages = 0; | 519 | reservedpages = 0; |
527 | 520 | ||
528 | /* this will put all low memory onto the freelists */ | 521 | /* this will put all low memory onto the freelists */ |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 882328efc3db..8fe576baa148 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -134,8 +134,6 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, | |||
134 | return NULL; | 134 | return NULL; |
135 | } | 135 | } |
136 | 136 | ||
137 | WARN_ON_ONCE(page_is_ram(pfn)); | ||
138 | |||
139 | switch (mode) { | 137 | switch (mode) { |
140 | case IOR_MODE_UNCACHED: | 138 | case IOR_MODE_UNCACHED: |
141 | default: | 139 | default: |
@@ -162,7 +160,7 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, | |||
162 | area->phys_addr = phys_addr; | 160 | area->phys_addr = phys_addr; |
163 | vaddr = (unsigned long) area->addr; | 161 | vaddr = (unsigned long) area->addr; |
164 | if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { | 162 | if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { |
165 | remove_vm_area((void *)(vaddr & PAGE_MASK)); | 163 | free_vm_area(area); |
166 | return NULL; | 164 | return NULL; |
167 | } | 165 | } |
168 | 166 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 59898fb0a4aa..8ccfee10f5b5 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -622,13 +622,17 @@ void __init init_cpu_to_node(void) | |||
622 | int i; | 622 | int i; |
623 | 623 | ||
624 | for (i = 0; i < NR_CPUS; i++) { | 624 | for (i = 0; i < NR_CPUS; i++) { |
625 | int node; | ||
625 | u16 apicid = x86_cpu_to_apicid_init[i]; | 626 | u16 apicid = x86_cpu_to_apicid_init[i]; |
626 | 627 | ||
627 | if (apicid == BAD_APICID) | 628 | if (apicid == BAD_APICID) |
628 | continue; | 629 | continue; |
629 | if (apicid_to_node[apicid] == NUMA_NO_NODE) | 630 | node = apicid_to_node[apicid]; |
631 | if (node == NUMA_NO_NODE) | ||
630 | continue; | 632 | continue; |
631 | numa_set_node(i, apicid_to_node[apicid]); | 633 | if (!node_online(node)) |
634 | continue; | ||
635 | numa_set_node(i, node); | ||
632 | } | 636 | } |
633 | } | 637 | } |
634 | 638 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 464d8fc21ce6..14e48b5a94ba 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void) | |||
44 | 44 | ||
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
48 | # define debug_pagealloc 1 | ||
49 | #else | ||
50 | # define debug_pagealloc 0 | ||
51 | #endif | ||
52 | |||
47 | static inline int | 53 | static inline int |
48 | within(unsigned long addr, unsigned long start, unsigned long end) | 54 | within(unsigned long addr, unsigned long start, unsigned long end) |
49 | { | 55 | { |
@@ -355,45 +361,48 @@ out_unlock: | |||
355 | 361 | ||
356 | static LIST_HEAD(page_pool); | 362 | static LIST_HEAD(page_pool); |
357 | static unsigned long pool_size, pool_pages, pool_low; | 363 | static unsigned long pool_size, pool_pages, pool_low; |
358 | static unsigned long pool_used, pool_failed, pool_refill; | 364 | static unsigned long pool_used, pool_failed; |
359 | 365 | ||
360 | static void cpa_fill_pool(void) | 366 | static void cpa_fill_pool(struct page **ret) |
361 | { | 367 | { |
362 | struct page *p; | ||
363 | gfp_t gfp = GFP_KERNEL; | 368 | gfp_t gfp = GFP_KERNEL; |
369 | unsigned long flags; | ||
370 | struct page *p; | ||
364 | 371 | ||
365 | /* Do not allocate from interrupt context */ | ||
366 | if (in_irq() || irqs_disabled()) | ||
367 | return; | ||
368 | /* | 372 | /* |
369 | * Check unlocked. I does not matter when we have one more | 373 | * Avoid recursion (on debug-pagealloc) and also signal |
370 | * page in the pool. The bit lock avoids recursive pool | 374 | * our priority to get to these pagetables: |
371 | * allocations: | ||
372 | */ | 375 | */ |
373 | if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) | 376 | if (current->flags & PF_MEMALLOC) |
374 | return; | 377 | return; |
378 | current->flags |= PF_MEMALLOC; | ||
375 | 379 | ||
376 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
377 | /* | 380 | /* |
378 | * We could do: | 381 | * Allocate atomically from atomic contexts: |
379 | * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; | ||
380 | * but this fails on !PREEMPT kernels | ||
381 | */ | 382 | */ |
382 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | 383 | if (in_atomic() || irqs_disabled() || debug_pagealloc) |
383 | #endif | 384 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; |
384 | 385 | ||
385 | while (pool_pages < pool_size) { | 386 | while (pool_pages < pool_size || (ret && !*ret)) { |
386 | p = alloc_pages(gfp, 0); | 387 | p = alloc_pages(gfp, 0); |
387 | if (!p) { | 388 | if (!p) { |
388 | pool_failed++; | 389 | pool_failed++; |
389 | break; | 390 | break; |
390 | } | 391 | } |
391 | spin_lock_irq(&pgd_lock); | 392 | /* |
393 | * If the call site needs a page right now, provide it: | ||
394 | */ | ||
395 | if (ret && !*ret) { | ||
396 | *ret = p; | ||
397 | continue; | ||
398 | } | ||
399 | spin_lock_irqsave(&pgd_lock, flags); | ||
392 | list_add(&p->lru, &page_pool); | 400 | list_add(&p->lru, &page_pool); |
393 | pool_pages++; | 401 | pool_pages++; |
394 | spin_unlock_irq(&pgd_lock); | 402 | spin_unlock_irqrestore(&pgd_lock, flags); |
395 | } | 403 | } |
396 | clear_bit_unlock(0, &pool_refill); | 404 | |
405 | current->flags &= ~PF_MEMALLOC; | ||
397 | } | 406 | } |
398 | 407 | ||
399 | #define SHIFT_MB (20 - PAGE_SHIFT) | 408 | #define SHIFT_MB (20 - PAGE_SHIFT) |
@@ -414,11 +423,15 @@ void __init cpa_init(void) | |||
414 | * GiB. Shift MiB to Gib and multiply the result by | 423 | * GiB. Shift MiB to Gib and multiply the result by |
415 | * POOL_PAGES_PER_GB: | 424 | * POOL_PAGES_PER_GB: |
416 | */ | 425 | */ |
417 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | 426 | if (debug_pagealloc) { |
418 | pool_size = POOL_PAGES_PER_GB * gb; | 427 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; |
428 | pool_size = POOL_PAGES_PER_GB * gb; | ||
429 | } else { | ||
430 | pool_size = 1; | ||
431 | } | ||
419 | pool_low = pool_size; | 432 | pool_low = pool_size; |
420 | 433 | ||
421 | cpa_fill_pool(); | 434 | cpa_fill_pool(NULL); |
422 | printk(KERN_DEBUG | 435 | printk(KERN_DEBUG |
423 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | 436 | "CPA: page pool initialized %lu of %lu pages preallocated\n", |
424 | pool_pages, pool_size); | 437 | pool_pages, pool_size); |
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
440 | spin_lock_irqsave(&pgd_lock, flags); | 453 | spin_lock_irqsave(&pgd_lock, flags); |
441 | if (list_empty(&page_pool)) { | 454 | if (list_empty(&page_pool)) { |
442 | spin_unlock_irqrestore(&pgd_lock, flags); | 455 | spin_unlock_irqrestore(&pgd_lock, flags); |
443 | return -ENOMEM; | 456 | base = NULL; |
457 | cpa_fill_pool(&base); | ||
458 | if (!base) | ||
459 | return -ENOMEM; | ||
460 | spin_lock_irqsave(&pgd_lock, flags); | ||
461 | } else { | ||
462 | base = list_first_entry(&page_pool, struct page, lru); | ||
463 | list_del(&base->lru); | ||
464 | pool_pages--; | ||
465 | |||
466 | if (pool_pages < pool_low) | ||
467 | pool_low = pool_pages; | ||
444 | } | 468 | } |
445 | 469 | ||
446 | base = list_first_entry(&page_pool, struct page, lru); | ||
447 | list_del(&base->lru); | ||
448 | pool_pages--; | ||
449 | |||
450 | if (pool_pages < pool_low) | ||
451 | pool_low = pool_pages; | ||
452 | |||
453 | /* | 470 | /* |
454 | * Check for races, another CPU might have split this page | 471 | * Check for races, another CPU might have split this page |
455 | * up for us already: | 472 | * up for us already: |
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
734 | cpa_flush_all(cache); | 751 | cpa_flush_all(cache); |
735 | 752 | ||
736 | out: | 753 | out: |
737 | cpa_fill_pool(); | 754 | cpa_fill_pool(NULL); |
755 | |||
738 | return ret; | 756 | return ret; |
739 | } | 757 | } |
740 | 758 | ||
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
897 | * Try to refill the page pool here. We can do this only after | 915 | * Try to refill the page pool here. We can do this only after |
898 | * the tlb flush. | 916 | * the tlb flush. |
899 | */ | 917 | */ |
900 | cpa_fill_pool(); | 918 | cpa_fill_pool(NULL); |
901 | } | 919 | } |
902 | 920 | ||
903 | #ifdef CONFIG_HIBERNATION | 921 | #ifdef CONFIG_HIBERNATION |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 73aba7125203..2f9e9afcb9f4 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -342,12 +342,16 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) | |||
342 | 342 | ||
343 | pgd_t *pgd_alloc(struct mm_struct *mm) | 343 | pgd_t *pgd_alloc(struct mm_struct *mm) |
344 | { | 344 | { |
345 | pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); | 345 | pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
346 | 346 | ||
347 | mm->pgd = pgd; /* so that alloc_pd can use it */ | 347 | /* so that alloc_pd can use it */ |
348 | mm->pgd = pgd; | ||
349 | if (pgd) | ||
350 | pgd_ctor(pgd); | ||
348 | 351 | ||
349 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | 352 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { |
350 | quicklist_free(0, pgd_dtor, pgd); | 353 | pgd_dtor(pgd); |
354 | free_page((unsigned long)pgd); | ||
351 | pgd = NULL; | 355 | pgd = NULL; |
352 | } | 356 | } |
353 | 357 | ||
@@ -357,12 +361,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
357 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) | 361 | void pgd_free(struct mm_struct *mm, pgd_t *pgd) |
358 | { | 362 | { |
359 | pgd_mop_up_pmds(mm, pgd); | 363 | pgd_mop_up_pmds(mm, pgd); |
360 | quicklist_free(0, pgd_dtor, pgd); | 364 | pgd_dtor(pgd); |
361 | } | 365 | free_page((unsigned long)pgd); |
362 | |||
363 | void check_pgt_cache(void) | ||
364 | { | ||
365 | quicklist_trim(0, pgd_dtor, 25, 16); | ||
366 | } | 366 | } |
367 | 367 | ||
368 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | 368 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 10ac8c316c46..2f7109ac4c15 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -198,6 +198,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, | |||
198 | "b" (bx), | 198 | "b" (bx), |
199 | "D" ((long)reg), | 199 | "D" ((long)reg), |
200 | "S" (&pci_indirect)); | 200 | "S" (&pci_indirect)); |
201 | /* | ||
202 | * Zero-extend the result beyond 8 bits, do not trust the | ||
203 | * BIOS having done it: | ||
204 | */ | ||
205 | *value &= 0xff; | ||
201 | break; | 206 | break; |
202 | case 2: | 207 | case 2: |
203 | __asm__("lcall *(%%esi); cld\n\t" | 208 | __asm__("lcall *(%%esi); cld\n\t" |
@@ -210,6 +215,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, | |||
210 | "b" (bx), | 215 | "b" (bx), |
211 | "D" ((long)reg), | 216 | "D" ((long)reg), |
212 | "S" (&pci_indirect)); | 217 | "S" (&pci_indirect)); |
218 | /* | ||
219 | * Zero-extend the result beyond 16 bits, do not trust the | ||
220 | * BIOS having done it: | ||
221 | */ | ||
222 | *value &= 0xffff; | ||
213 | break; | 223 | break; |
214 | case 4: | 224 | case 4: |
215 | __asm__("lcall *(%%esi); cld\n\t" | 225 | __asm__("lcall *(%%esi); cld\n\t" |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index f385a4b4a484..0a8f4742ef51 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -50,7 +50,9 @@ obj-$(VDSO64-y) += vdso-syms.lds | |||
50 | sed-vdsosym := -e 's/^00*/0/' \ | 50 | sed-vdsosym := -e 's/^00*/0/' \ |
51 | -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' | 51 | -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' |
52 | quiet_cmd_vdsosym = VDSOSYM $@ | 52 | quiet_cmd_vdsosym = VDSOSYM $@ |
53 | cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ | 53 | define cmd_vdsosym |
54 | $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ | ||
55 | endef | ||
54 | 56 | ||
55 | $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE | 57 | $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE |
56 | $(call if_changed,vdsosym) | 58 | $(call if_changed,vdsosym) |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 49e5358f481a..8b9ee27805fd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -153,6 +153,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, | |||
153 | if (*ax == 1) | 153 | if (*ax == 1) |
154 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ | 154 | maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ |
155 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ | 155 | (1 << X86_FEATURE_ACPI) | /* disable ACPI */ |
156 | (1 << X86_FEATURE_SEP) | /* disable SEP */ | ||
156 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ | 157 | (1 << X86_FEATURE_ACC)); /* thermal monitoring */ |
157 | 158 | ||
158 | asm(XEN_EMULATE_PREFIX "cpuid" | 159 | asm(XEN_EMULATE_PREFIX "cpuid" |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3bad4773a2f3..2341492bf7a0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -38,7 +38,8 @@ char * __init xen_memory_setup(void) | |||
38 | unsigned long max_pfn = xen_start_info->nr_pages; | 38 | unsigned long max_pfn = xen_start_info->nr_pages; |
39 | 39 | ||
40 | e820.nr_map = 0; | 40 | e820.nr_map = 0; |
41 | add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); | 41 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); |
42 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); | ||
42 | 43 | ||
43 | return "Xen"; | 44 | return "Xen"; |
44 | } | 45 | } |