aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig.cpu14
-rw-r--r--arch/x86/boot/memory.c9
-rw-r--r--arch/x86/kernel/asm-offsets_32.c4
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c9
-rw-r--r--arch/x86/kernel/cpu/transmeta.c7
-rw-r--r--arch/x86/kernel/entry_64.S6
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S22
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/setup_64.c2
-rw-r--r--arch/x86/kernel/smpboot_64.c2
-rw-r--r--arch/x86/kernel/stacktrace.c4
-rw-r--r--arch/x86/kernel/tsc_32.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c52
-rw-r--r--arch/x86/lguest/boot.c12
-rw-r--r--arch/x86/mm/init_64.c13
-rw-r--r--arch/x86/mm/pageattr.c84
-rw-r--r--arch/x86/vdso/Makefile2
20 files changed, 121 insertions, 138 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e09a6b73a1aa..6d50064db182 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -377,6 +377,19 @@ config X86_OOSTORE
377 def_bool y 377 def_bool y
378 depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR 378 depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
379 379
380#
381# P6_NOPs are a relatively minor optimization that require a family >=
382# 6 processor, except that it is broken on certain VIA chips.
383# Furthermore, AMD chips prefer a totally different sequence of NOPs
384# (which work on all CPUs). As a result, disallow these if we're
385# compiling X86_GENERIC but not X86_64 (these NOPs do work on all
386# x86-64 capable chips); the list of processors in the right-hand clause
387# are the cores that benefit from this optimization.
388#
389config X86_P6_NOP
390 def_bool y
391 depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
392
380config X86_TSC 393config X86_TSC
381 def_bool y 394 def_bool y
382 depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 395 depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
@@ -390,6 +403,7 @@ config X86_CMOV
390config X86_MINIMUM_CPU_FAMILY 403config X86_MINIMUM_CPU_FAMILY
391 int 404 int
392 default "64" if X86_64 405 default "64" if X86_64
406 default "6" if X86_32 && X86_P6_NOP
393 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) 407 default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
394 default "3" 408 default "3"
395 409
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 378353956b5d..e77d89f9e8aa 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -37,6 +37,12 @@ static int detect_memory_e820(void)
37 "=m" (*desc) 37 "=m" (*desc)
38 : "D" (desc), "d" (SMAP), "a" (0xe820)); 38 : "D" (desc), "d" (SMAP), "a" (0xe820));
39 39
40 /* BIOSes which terminate the chain with CF = 1 as opposed
41 to %ebx = 0 don't always report the SMAP signature on
42 the final, failing, probe. */
43 if (err)
44 break;
45
40 /* Some BIOSes stop returning SMAP in the middle of 46 /* Some BIOSes stop returning SMAP in the middle of
41 the search loop. We don't know exactly how the BIOS 47 the search loop. We don't know exactly how the BIOS
42 screwed up the map at that point, we might have a 48 screwed up the map at that point, we might have a
@@ -47,9 +53,6 @@ static int detect_memory_e820(void)
47 break; 53 break;
48 } 54 }
49 55
50 if (err)
51 break;
52
53 count++; 56 count++;
54 desc++; 57 desc++;
55 } while (next && count < E820MAX); 58 } while (next && count < E820MAX);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index a33d53017997..8ea040124f7d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -128,13 +128,11 @@ void foo(void)
128 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); 128 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
129#endif 129#endif
130 130
131#ifdef CONFIG_LGUEST_GUEST 131#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
132 BLANK(); 132 BLANK();
133 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); 133 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
134 OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); 134 OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
135#endif
136 135
137#ifdef CONFIG_LGUEST
138 BLANK(); 136 BLANK();
139 OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); 137 OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
140 OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); 138 OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f86a3c4a2669..a38aafaefc23 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -504,7 +504,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
504 504
505 /* Clear all flags overriden by options */ 505 /* Clear all flags overriden by options */
506 for (i = 0; i < NCAPINTS; i++) 506 for (i = 0; i < NCAPINTS; i++)
507 c->x86_capability[i] ^= cleared_cpu_caps[i]; 507 c->x86_capability[i] &= ~cleared_cpu_caps[i];
508 508
509 /* Init Machine Check Exception if available. */ 509 /* Init Machine Check Exception if available. */
510 mcheck_init(c); 510 mcheck_init(c);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b6e136f23d3d..be83336fddba 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -43,6 +43,7 @@
43#include <asm/uaccess.h> 43#include <asm/uaccess.h>
44#include <asm/processor.h> 44#include <asm/processor.h>
45#include <asm/msr.h> 45#include <asm/msr.h>
46#include <asm/kvm_para.h>
46#include "mtrr.h" 47#include "mtrr.h"
47 48
48u32 num_var_ranges = 0; 49u32 num_var_ranges = 0;
@@ -649,6 +650,7 @@ static __init int amd_special_default_mtrr(void)
649 650
650/** 651/**
651 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 652 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
653 * @end_pfn: ending page frame number
652 * 654 *
653 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain 655 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
654 * memory configurations. This routine checks that the highest MTRR matches 656 * memory configurations. This routine checks that the highest MTRR matches
@@ -688,8 +690,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
688 690
689 /* kvm/qemu doesn't have mtrr set right, don't trim them all */ 691 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
690 if (!highest_pfn) { 692 if (!highest_pfn) {
691 printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); 693 if (!kvm_para_available()) {
692 WARN_ON(1); 694 printk(KERN_WARNING
695 "WARNING: strange, CPU MTRRs all blank?\n");
696 WARN_ON(1);
697 }
693 return 0; 698 return 0;
694 } 699 }
695 700
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 200fb3f9ebfb..e8b422c1c512 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -76,13 +76,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
76 /* All Transmeta CPUs have a constant TSC */ 76 /* All Transmeta CPUs have a constant TSC */
77 set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); 77 set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
78 78
79 /* If we can run i686 user-space code, call us an i686 */
80#define USER686 ((1 << X86_FEATURE_TSC)|\
81 (1 << X86_FEATURE_CX8)|\
82 (1 << X86_FEATURE_CMOV))
83 if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686)
84 c->x86 = 6;
85
86#ifdef CONFIG_SYSCTL 79#ifdef CONFIG_SYSCTL
87 /* randomize_va_space slows us down enormously; 80 /* randomize_va_space slows us down enormously;
88 it probably triggers retranslation of x86->native bytecode */ 81 it probably triggers retranslation of x86->native bytecode */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2ad9a1bc6a73..c20c9e7e08dd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -453,6 +453,7 @@ ENTRY(stub_execve)
453 CFI_REGISTER rip, r11 453 CFI_REGISTER rip, r11
454 SAVE_REST 454 SAVE_REST
455 FIXUP_TOP_OF_STACK %r11 455 FIXUP_TOP_OF_STACK %r11
456 movq %rsp, %rcx
456 call sys_execve 457 call sys_execve
457 RESTORE_TOP_OF_STACK %r11 458 RESTORE_TOP_OF_STACK %r11
458 movq %rax,RAX(%rsp) 459 movq %rax,RAX(%rsp)
@@ -1036,15 +1037,16 @@ ENDPROC(child_rip)
1036 * rdi: name, rsi: argv, rdx: envp 1037 * rdi: name, rsi: argv, rdx: envp
1037 * 1038 *
1038 * We want to fallback into: 1039 * We want to fallback into:
1039 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) 1040 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1040 * 1041 *
1041 * do_sys_execve asm fallback arguments: 1042 * do_sys_execve asm fallback arguments:
1042 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack 1043 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1043 */ 1044 */
1044ENTRY(kernel_execve) 1045ENTRY(kernel_execve)
1045 CFI_STARTPROC 1046 CFI_STARTPROC
1046 FAKE_STACK_FRAME $0 1047 FAKE_STACK_FRAME $0
1047 SAVE_ALL 1048 SAVE_ALL
1049 movq %rsp,%rcx
1048 call sys_execve 1050 call sys_execve
1049 movq %rax, RAX(%rsp) 1051 movq %rax, RAX(%rsp)
1050 RESTORE_REST 1052 RESTORE_REST
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 25eb98540a41..fd8ca53943a8 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -606,7 +606,7 @@ ENTRY(_stext)
606.section ".bss.page_aligned","wa" 606.section ".bss.page_aligned","wa"
607 .align PAGE_SIZE_asm 607 .align PAGE_SIZE_asm
608#ifdef CONFIG_X86_PAE 608#ifdef CONFIG_X86_PAE
609ENTRY(swapper_pg_pmd) 609swapper_pg_pmd:
610 .fill 1024*KPMDS,4,0 610 .fill 1024*KPMDS,4,0
611#else 611#else
612ENTRY(swapper_pg_dir) 612ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index eb415043a929..a007454133a3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt)
379 /* Since I easily can, map the first 1G. 379 /* Since I easily can, map the first 1G.
380 * Don't set NX because code runs from these pages. 380 * Don't set NX because code runs from these pages.
381 */ 381 */
382 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) 382 PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
383 383
384NEXT_PAGE(level2_kernel_pgt) 384NEXT_PAGE(level2_kernel_pgt)
385 /* 40MB kernel mapping. The kernel code cannot be bigger than that. 385 /*
386 When you change this change KERNEL_TEXT_SIZE in page.h too. */ 386 * 128 MB kernel mapping. We spend a full page on this pagetable
387 /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ 387 * anyway.
388 PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE) 388 *
389 /* Module mapping starts here */ 389 * The kernel code+data+bss must not be bigger than that.
390 .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0 390 *
391 * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
392 * If you want to increase this then increase MODULES_VADDR
393 * too.)
394 */
395 PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
396 KERNEL_IMAGE_SIZE/PMD_SIZE)
391 397
392NEXT_PAGE(level2_spare_pgt) 398NEXT_PAGE(level2_spare_pgt)
393 .fill 512,8,0 399 .fill 512, 8, 0
394 400
395#undef PMDS 401#undef PMDS
396#undef NEXT_PAGE 402#undef NEXT_PAGE
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 429d084e014d..235fd6c77504 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -368,8 +368,8 @@ static int hpet_clocksource_register(void)
368 return 0; 368 return 0;
369} 369}
370 370
371/* 371/**
372 * Try to setup the HPET timer 372 * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
373 */ 373 */
374int __init hpet_enable(void) 374int __init hpet_enable(void)
375{ 375{
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b0cc8f0136d8..43f287744f9f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -730,16 +730,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
730 */ 730 */
731asmlinkage 731asmlinkage
732long sys_execve(char __user *name, char __user * __user *argv, 732long sys_execve(char __user *name, char __user * __user *argv,
733 char __user * __user *envp, struct pt_regs regs) 733 char __user * __user *envp, struct pt_regs *regs)
734{ 734{
735 long error; 735 long error;
736 char * filename; 736 char * filename;
737 737
738 filename = getname(name); 738 filename = getname(name);
739 error = PTR_ERR(filename); 739 error = PTR_ERR(filename);
740 if (IS_ERR(filename)) 740 if (IS_ERR(filename))
741 return error; 741 return error;
742 error = do_execve(filename, argv, envp, &regs); 742 error = do_execve(filename, argv, envp, regs);
743 putname(filename); 743 putname(filename);
744 return error; 744 return error;
745} 745}
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6fd804f07821..7637dc91c79b 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1021,7 +1021,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1021 1021
1022 /* Clear all flags overriden by options */ 1022 /* Clear all flags overriden by options */
1023 for (i = 0; i < NCAPINTS; i++) 1023 for (i = 0; i < NCAPINTS; i++)
1024 c->x86_capability[i] ^= cleared_cpu_caps[i]; 1024 c->x86_capability[i] &= ~cleared_cpu_caps[i];
1025 1025
1026#ifdef CONFIG_X86_MCE 1026#ifdef CONFIG_X86_MCE
1027 mcheck_init(c); 1027 mcheck_init(c);
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index d53bd6fcb428..0880f2c388a9 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -554,10 +554,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
554 int timeout; 554 int timeout;
555 unsigned long start_rip; 555 unsigned long start_rip;
556 struct create_idle c_idle = { 556 struct create_idle c_idle = {
557 .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
558 .cpu = cpu, 557 .cpu = cpu,
559 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 558 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
560 }; 559 };
560 INIT_WORK(&c_idle.work, do_fork_idle);
561 561
562 /* allocate memory for gdts of secondary cpus. Hotplug is considered */ 562 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
563 if (!cpu_gdt_descr[cpu].address && 563 if (!cpu_gdt_descr[cpu].address &&
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 02f0f61f5b11..c28c342c162f 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -25,6 +25,8 @@ static int save_stack_stack(void *data, char *name)
25static void save_stack_address(void *data, unsigned long addr, int reliable) 25static void save_stack_address(void *data, unsigned long addr, int reliable)
26{ 26{
27 struct stack_trace *trace = data; 27 struct stack_trace *trace = data;
28 if (!reliable)
29 return;
28 if (trace->skip > 0) { 30 if (trace->skip > 0) {
29 trace->skip--; 31 trace->skip--;
30 return; 32 return;
@@ -37,6 +39,8 @@ static void
37save_stack_address_nosched(void *data, unsigned long addr, int reliable) 39save_stack_address_nosched(void *data, unsigned long addr, int reliable)
38{ 40{
39 struct stack_trace *trace = (struct stack_trace *)data; 41 struct stack_trace *trace = (struct stack_trace *)data;
42 if (!reliable)
43 return;
40 if (in_sched_functions(addr)) 44 if (in_sched_functions(addr))
41 return; 45 return;
42 if (trace->skip > 0) { 46 if (trace->skip > 0) {
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index 43517e324be8..f14cfd9d1f94 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -28,7 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
28static int __init tsc_setup(char *str) 28static int __init tsc_setup(char *str)
29{ 29{
30 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " 30 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
31 "cannot disable TSC.\n"); 31 "cannot disable TSC completely.\n");
32 mark_tsc_unstable("user disabled TSC");
32 return 1; 33 return 1;
33} 34}
34#else 35#else
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3f8242774580..b6be812fac05 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,11 +44,6 @@
44 44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
46#define __syscall_clobber "r11","cx","memory" 46#define __syscall_clobber "r11","cx","memory"
47#define __pa_vsymbol(x) \
48 ({unsigned long v; \
49 extern char __vsyscall_0; \
50 asm("" : "=r" (v) : "0" (x)); \
51 ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
52 47
53/* 48/*
54 * vsyscall_gtod_data contains data that is : 49 * vsyscall_gtod_data contains data that is :
@@ -102,7 +97,7 @@ static __always_inline void do_get_tz(struct timezone * tz)
102static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) 97static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
103{ 98{
104 int ret; 99 int ret;
105 asm volatile("vsysc2: syscall" 100 asm volatile("syscall"
106 : "=a" (ret) 101 : "=a" (ret)
107 : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) 102 : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
108 : __syscall_clobber ); 103 : __syscall_clobber );
@@ -112,7 +107,7 @@ static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
112static __always_inline long time_syscall(long *t) 107static __always_inline long time_syscall(long *t)
113{ 108{
114 long secs; 109 long secs;
115 asm volatile("vsysc1: syscall" 110 asm volatile("syscall"
116 : "=a" (secs) 111 : "=a" (secs)
117 : "0" (__NR_time),"D" (t) : __syscall_clobber); 112 : "0" (__NR_time),"D" (t) : __syscall_clobber);
118 return secs; 113 return secs;
@@ -227,50 +222,10 @@ long __vsyscall(3) venosys_1(void)
227} 222}
228 223
229#ifdef CONFIG_SYSCTL 224#ifdef CONFIG_SYSCTL
230
231#define SYSCALL 0x050f
232#define NOP2 0x9090
233
234/*
235 * NOP out syscall in vsyscall page when not needed.
236 */
237static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
238 void __user *buffer, size_t *lenp, loff_t *ppos)
239{
240 extern u16 vsysc1, vsysc2;
241 u16 __iomem *map1;
242 u16 __iomem *map2;
243 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
244 if (!write)
245 return ret;
246 /* gcc has some trouble with __va(__pa()), so just do it this
247 way. */
248 map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
249 if (!map1)
250 return -ENOMEM;
251 map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
252 if (!map2) {
253 ret = -ENOMEM;
254 goto out;
255 }
256 if (!vsyscall_gtod_data.sysctl_enabled) {
257 writew(SYSCALL, map1);
258 writew(SYSCALL, map2);
259 } else {
260 writew(NOP2, map1);
261 writew(NOP2, map2);
262 }
263 iounmap(map2);
264out:
265 iounmap(map1);
266 return ret;
267}
268
269static ctl_table kernel_table2[] = { 225static ctl_table kernel_table2[] = {
270 { .procname = "vsyscall64", 226 { .procname = "vsyscall64",
271 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), 227 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
272 .mode = 0644, 228 .mode = 0644 },
273 .proc_handler = vsyscall_sysctl_change },
274 {} 229 {}
275}; 230};
276 231
@@ -279,7 +234,6 @@ static ctl_table kernel_root_table2[] = {
279 .child = kernel_table2 }, 234 .child = kernel_table2 },
280 {} 235 {}
281}; 236};
282
283#endif 237#endif
284 238
285/* Assume __initcall executes before all user space. Hopefully kmod 239/* Assume __initcall executes before all user space. Hopefully kmod
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5afdde4895dc..cccb38a59653 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -57,6 +57,7 @@
57#include <linux/lguest_launcher.h> 57#include <linux/lguest_launcher.h>
58#include <linux/virtio_console.h> 58#include <linux/virtio_console.h>
59#include <linux/pm.h> 59#include <linux/pm.h>
60#include <asm/lguest.h>
60#include <asm/paravirt.h> 61#include <asm/paravirt.h>
61#include <asm/param.h> 62#include <asm/param.h>
62#include <asm/page.h> 63#include <asm/page.h>
@@ -75,15 +76,6 @@
75 * behaving in simplified but equivalent ways. In particular, the Guest is the 76 * behaving in simplified but equivalent ways. In particular, the Guest is the
76 * same kernel as the Host (or at least, built from the same source code). :*/ 77 * same kernel as the Host (or at least, built from the same source code). :*/
77 78
78/* Declarations for definitions in lguest_guest.S */
79extern char lguest_noirq_start[], lguest_noirq_end[];
80extern const char lgstart_cli[], lgend_cli[];
81extern const char lgstart_sti[], lgend_sti[];
82extern const char lgstart_popf[], lgend_popf[];
83extern const char lgstart_pushf[], lgend_pushf[];
84extern const char lgstart_iret[], lgend_iret[];
85extern void lguest_iret(void);
86
87struct lguest_data lguest_data = { 79struct lguest_data lguest_data = {
88 .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, 80 .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
89 .noirq_start = (u32)lguest_noirq_start, 81 .noirq_start = (u32)lguest_noirq_start,
@@ -489,7 +481,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
489{ 481{
490 *pmdp = pmdval; 482 *pmdp = pmdval;
491 lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, 483 lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
492 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); 484 (__pa(pmdp)&(PAGE_SIZE-1)), 0);
493} 485}
494 486
495/* There are a couple of legacy places where the kernel sets a PTE, but we 487/* There are a couple of legacy places where the kernel sets a PTE, but we
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb652f5a93fb..a02a14f0f324 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
172} 172}
173 173
174/* 174/*
175 * The head.S code sets up the kernel high mapping from: 175 * The head.S code sets up the kernel high mapping:
176 * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE 176 *
177 * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
177 * 178 *
178 * phys_addr holds the negative offset to the kernel, which is added 179 * phys_addr holds the negative offset to the kernel, which is added
179 * to the compile time generated pmds. This results in invalid pmds up 180 * to the compile time generated pmds. This results in invalid pmds up
@@ -515,14 +516,6 @@ void __init mem_init(void)
515 516
516 /* clear_bss() already clear the empty_zero_page */ 517 /* clear_bss() already clear the empty_zero_page */
517 518
518 /* temporary debugging - double check it's true: */
519 {
520 int i;
521
522 for (i = 0; i < 1024; i++)
523 WARN_ON_ONCE(empty_zero_page[i]);
524 }
525
526 reservedpages = 0; 519 reservedpages = 0;
527 520
528 /* this will put all low memory onto the freelists */ 521 /* this will put all low memory onto the freelists */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 464d8fc21ce6..14e48b5a94ba 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void)
44 44
45#endif 45#endif
46 46
47#ifdef CONFIG_DEBUG_PAGEALLOC
48# define debug_pagealloc 1
49#else
50# define debug_pagealloc 0
51#endif
52
47static inline int 53static inline int
48within(unsigned long addr, unsigned long start, unsigned long end) 54within(unsigned long addr, unsigned long start, unsigned long end)
49{ 55{
@@ -355,45 +361,48 @@ out_unlock:
355 361
356static LIST_HEAD(page_pool); 362static LIST_HEAD(page_pool);
357static unsigned long pool_size, pool_pages, pool_low; 363static unsigned long pool_size, pool_pages, pool_low;
358static unsigned long pool_used, pool_failed, pool_refill; 364static unsigned long pool_used, pool_failed;
359 365
360static void cpa_fill_pool(void) 366static void cpa_fill_pool(struct page **ret)
361{ 367{
362 struct page *p;
363 gfp_t gfp = GFP_KERNEL; 368 gfp_t gfp = GFP_KERNEL;
369 unsigned long flags;
370 struct page *p;
364 371
365 /* Do not allocate from interrupt context */
366 if (in_irq() || irqs_disabled())
367 return;
368 /* 372 /*
369 * Check unlocked. I does not matter when we have one more 373 * Avoid recursion (on debug-pagealloc) and also signal
370 * page in the pool. The bit lock avoids recursive pool 374 * our priority to get to these pagetables:
371 * allocations:
372 */ 375 */
373 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) 376 if (current->flags & PF_MEMALLOC)
374 return; 377 return;
378 current->flags |= PF_MEMALLOC;
375 379
376#ifdef CONFIG_DEBUG_PAGEALLOC
377 /* 380 /*
378 * We could do: 381 * Allocate atomically from atomic contexts:
379 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
380 * but this fails on !PREEMPT kernels
381 */ 382 */
382 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; 383 if (in_atomic() || irqs_disabled() || debug_pagealloc)
383#endif 384 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
384 385
385 while (pool_pages < pool_size) { 386 while (pool_pages < pool_size || (ret && !*ret)) {
386 p = alloc_pages(gfp, 0); 387 p = alloc_pages(gfp, 0);
387 if (!p) { 388 if (!p) {
388 pool_failed++; 389 pool_failed++;
389 break; 390 break;
390 } 391 }
391 spin_lock_irq(&pgd_lock); 392 /*
393 * If the call site needs a page right now, provide it:
394 */
395 if (ret && !*ret) {
396 *ret = p;
397 continue;
398 }
399 spin_lock_irqsave(&pgd_lock, flags);
392 list_add(&p->lru, &page_pool); 400 list_add(&p->lru, &page_pool);
393 pool_pages++; 401 pool_pages++;
394 spin_unlock_irq(&pgd_lock); 402 spin_unlock_irqrestore(&pgd_lock, flags);
395 } 403 }
396 clear_bit_unlock(0, &pool_refill); 404
405 current->flags &= ~PF_MEMALLOC;
397} 406}
398 407
399#define SHIFT_MB (20 - PAGE_SHIFT) 408#define SHIFT_MB (20 - PAGE_SHIFT)
@@ -414,11 +423,15 @@ void __init cpa_init(void)
414 * GiB. Shift MiB to Gib and multiply the result by 423 * GiB. Shift MiB to Gib and multiply the result by
415 * POOL_PAGES_PER_GB: 424 * POOL_PAGES_PER_GB:
416 */ 425 */
417 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; 426 if (debug_pagealloc) {
418 pool_size = POOL_PAGES_PER_GB * gb; 427 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
428 pool_size = POOL_PAGES_PER_GB * gb;
429 } else {
430 pool_size = 1;
431 }
419 pool_low = pool_size; 432 pool_low = pool_size;
420 433
421 cpa_fill_pool(); 434 cpa_fill_pool(NULL);
422 printk(KERN_DEBUG 435 printk(KERN_DEBUG
423 "CPA: page pool initialized %lu of %lu pages preallocated\n", 436 "CPA: page pool initialized %lu of %lu pages preallocated\n",
424 pool_pages, pool_size); 437 pool_pages, pool_size);
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address)
440 spin_lock_irqsave(&pgd_lock, flags); 453 spin_lock_irqsave(&pgd_lock, flags);
441 if (list_empty(&page_pool)) { 454 if (list_empty(&page_pool)) {
442 spin_unlock_irqrestore(&pgd_lock, flags); 455 spin_unlock_irqrestore(&pgd_lock, flags);
443 return -ENOMEM; 456 base = NULL;
457 cpa_fill_pool(&base);
458 if (!base)
459 return -ENOMEM;
460 spin_lock_irqsave(&pgd_lock, flags);
461 } else {
462 base = list_first_entry(&page_pool, struct page, lru);
463 list_del(&base->lru);
464 pool_pages--;
465
466 if (pool_pages < pool_low)
467 pool_low = pool_pages;
444 } 468 }
445 469
446 base = list_first_entry(&page_pool, struct page, lru);
447 list_del(&base->lru);
448 pool_pages--;
449
450 if (pool_pages < pool_low)
451 pool_low = pool_pages;
452
453 /* 470 /*
454 * Check for races, another CPU might have split this page 471 * Check for races, another CPU might have split this page
455 * up for us already: 472 * up for us already:
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
734 cpa_flush_all(cache); 751 cpa_flush_all(cache);
735 752
736out: 753out:
737 cpa_fill_pool(); 754 cpa_fill_pool(NULL);
755
738 return ret; 756 return ret;
739} 757}
740 758
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
897 * Try to refill the page pool here. We can do this only after 915 * Try to refill the page pool here. We can do this only after
898 * the tlb flush. 916 * the tlb flush.
899 */ 917 */
900 cpa_fill_pool(); 918 cpa_fill_pool(NULL);
901} 919}
902 920
903#ifdef CONFIG_HIBERNATION 921#ifdef CONFIG_HIBERNATION
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index f385a4b4a484..b8bd0c4aa02e 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -48,7 +48,7 @@ obj-$(VDSO64-y) += vdso-syms.lds
48# Match symbols in the DSO that look like VDSO*; produce a file of constants. 48# Match symbols in the DSO that look like VDSO*; produce a file of constants.
49# 49#
50sed-vdsosym := -e 's/^00*/0/' \ 50sed-vdsosym := -e 's/^00*/0/' \
51 -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' 51 -e 's/^\([[:xdigit:]]*\) . \(VDSO[[:alnum:]_]*\)$$/\2 = 0x\1;/p'
52quiet_cmd_vdsosym = VDSOSYM $@ 52quiet_cmd_vdsosym = VDSOSYM $@
53 cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ 53 cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
54 54