diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-26 10:44:09 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-26 10:44:09 -0500 |
commit | 7c811e4b6af424c295e3c6438fdc9b647fe6595f (patch) | |
tree | feab985b30e44102eca093e71ccdfad4f0318087 /arch | |
parent | 37c00b84d0c1b5c4c65ae837e2235160c03e84c2 (diff) | |
parent | f18edc95a37a901ffcbe91f5e05105f916a04fae (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (24 commits)
x86: no robust/pi futex for real i386 CPUs
x86: fix boot failure on 486 due to TSC breakage
x86: fix build on non-C locales.
x86: make c_idle.work have a static address.
x86: don't save unreliable stack trace entries
x86: don't make swapper_pg_pmd global
x86: don't print a warning when MTRR are blank and running in KVM
x86: fix execve with -fstack-protect
x86: fix vsyscall wreckage
x86: rename KERNEL_TEXT_SIZE => KERNEL_IMAGE_SIZE
x86: fix spontaneous reboot with allyesconfig bzImage
x86: remove double-checking empty zero pages debug
x86: notsc is ignored on common configurations
x86/mtrr: fix kernel-doc missing notation
x86: handle BIOSes which terminate e820 with CF=1 and no SMAP
x86: add comments for NOPs
x86: don't use P6_NOPs if compiling with CONFIG_X86_GENERIC
x86: require family >= 6 if we are using P6 NOPs
x86: do not promote TM3x00/TM5x00 to i686-class
x86: hpet fix docbook comment
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig.cpu | 14 | ||||
-rw-r--r-- | arch/x86/boot/memory.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/transmeta.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 22 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/stacktrace.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/tsc_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 52 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 12 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 84 | ||||
-rw-r--r-- | arch/x86/vdso/Makefile | 2 |
20 files changed, 121 insertions, 138 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index e09a6b73a1aa..6d50064db182 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -377,6 +377,19 @@ config X86_OOSTORE | |||
377 | def_bool y | 377 | def_bool y |
378 | depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR | 378 | depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR |
379 | 379 | ||
380 | # | ||
381 | # P6_NOPs are a relatively minor optimization that require a family >= | ||
382 | # 6 processor, except that it is broken on certain VIA chips. | ||
383 | # Furthermore, AMD chips prefer a totally different sequence of NOPs | ||
384 | # (which work on all CPUs). As a result, disallow these if we're | ||
385 | # compiling X86_GENERIC but not X86_64 (these NOPs do work on all | ||
386 | # x86-64 capable chips); the list of processors in the right-hand clause | ||
387 | # are the cores that benefit from this optimization. | ||
388 | # | ||
389 | config X86_P6_NOP | ||
390 | def_bool y | ||
391 | depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4) | ||
392 | |||
380 | config X86_TSC | 393 | config X86_TSC |
381 | def_bool y | 394 | def_bool y |
382 | depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 | 395 | depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 |
@@ -390,6 +403,7 @@ config X86_CMOV | |||
390 | config X86_MINIMUM_CPU_FAMILY | 403 | config X86_MINIMUM_CPU_FAMILY |
391 | int | 404 | int |
392 | default "64" if X86_64 | 405 | default "64" if X86_64 |
406 | default "6" if X86_32 && X86_P6_NOP | ||
393 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) | 407 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) |
394 | default "3" | 408 | default "3" |
395 | 409 | ||
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 378353956b5d..e77d89f9e8aa 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -37,6 +37,12 @@ static int detect_memory_e820(void) | |||
37 | "=m" (*desc) | 37 | "=m" (*desc) |
38 | : "D" (desc), "d" (SMAP), "a" (0xe820)); | 38 | : "D" (desc), "d" (SMAP), "a" (0xe820)); |
39 | 39 | ||
40 | /* BIOSes which terminate the chain with CF = 1 as opposed | ||
41 | to %ebx = 0 don't always report the SMAP signature on | ||
42 | the final, failing, probe. */ | ||
43 | if (err) | ||
44 | break; | ||
45 | |||
40 | /* Some BIOSes stop returning SMAP in the middle of | 46 | /* Some BIOSes stop returning SMAP in the middle of |
41 | the search loop. We don't know exactly how the BIOS | 47 | the search loop. We don't know exactly how the BIOS |
42 | screwed up the map at that point, we might have a | 48 | screwed up the map at that point, we might have a |
@@ -47,9 +53,6 @@ static int detect_memory_e820(void) | |||
47 | break; | 53 | break; |
48 | } | 54 | } |
49 | 55 | ||
50 | if (err) | ||
51 | break; | ||
52 | |||
53 | count++; | 56 | count++; |
54 | desc++; | 57 | desc++; |
55 | } while (next && count < E820MAX); | 58 | } while (next && count < E820MAX); |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index a33d53017997..8ea040124f7d 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -128,13 +128,11 @@ void foo(void) | |||
128 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | 128 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); |
129 | #endif | 129 | #endif |
130 | 130 | ||
131 | #ifdef CONFIG_LGUEST_GUEST | 131 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
132 | BLANK(); | 132 | BLANK(); |
133 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 133 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
134 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); | 134 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); |
135 | #endif | ||
136 | 135 | ||
137 | #ifdef CONFIG_LGUEST | ||
138 | BLANK(); | 136 | BLANK(); |
139 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); | 137 | OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); |
140 | OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); | 138 | OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f86a3c4a2669..a38aafaefc23 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -504,7 +504,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
504 | 504 | ||
505 | /* Clear all flags overriden by options */ | 505 | /* Clear all flags overriden by options */ |
506 | for (i = 0; i < NCAPINTS; i++) | 506 | for (i = 0; i < NCAPINTS; i++) |
507 | c->x86_capability[i] ^= cleared_cpu_caps[i]; | 507 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; |
508 | 508 | ||
509 | /* Init Machine Check Exception if available. */ | 509 | /* Init Machine Check Exception if available. */ |
510 | mcheck_init(c); | 510 | mcheck_init(c); |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b6e136f23d3d..be83336fddba 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
44 | #include <asm/processor.h> | 44 | #include <asm/processor.h> |
45 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
46 | #include <asm/kvm_para.h> | ||
46 | #include "mtrr.h" | 47 | #include "mtrr.h" |
47 | 48 | ||
48 | u32 num_var_ranges = 0; | 49 | u32 num_var_ranges = 0; |
@@ -649,6 +650,7 @@ static __init int amd_special_default_mtrr(void) | |||
649 | 650 | ||
650 | /** | 651 | /** |
651 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | 652 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs |
653 | * @end_pfn: ending page frame number | ||
652 | * | 654 | * |
653 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain | 655 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain |
654 | * memory configurations. This routine checks that the highest MTRR matches | 656 | * memory configurations. This routine checks that the highest MTRR matches |
@@ -688,8 +690,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
688 | 690 | ||
689 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 691 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ |
690 | if (!highest_pfn) { | 692 | if (!highest_pfn) { |
691 | printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); | 693 | if (!kvm_para_available()) { |
692 | WARN_ON(1); | 694 | printk(KERN_WARNING |
695 | "WARNING: strange, CPU MTRRs all blank?\n"); | ||
696 | WARN_ON(1); | ||
697 | } | ||
693 | return 0; | 698 | return 0; |
694 | } | 699 | } |
695 | 700 | ||
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 200fb3f9ebfb..e8b422c1c512 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -76,13 +76,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
76 | /* All Transmeta CPUs have a constant TSC */ | 76 | /* All Transmeta CPUs have a constant TSC */ |
77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | 77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); |
78 | 78 | ||
79 | /* If we can run i686 user-space code, call us an i686 */ | ||
80 | #define USER686 ((1 << X86_FEATURE_TSC)|\ | ||
81 | (1 << X86_FEATURE_CX8)|\ | ||
82 | (1 << X86_FEATURE_CMOV)) | ||
83 | if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) | ||
84 | c->x86 = 6; | ||
85 | |||
86 | #ifdef CONFIG_SYSCTL | 79 | #ifdef CONFIG_SYSCTL |
87 | /* randomize_va_space slows us down enormously; | 80 | /* randomize_va_space slows us down enormously; |
88 | it probably triggers retranslation of x86->native bytecode */ | 81 | it probably triggers retranslation of x86->native bytecode */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2ad9a1bc6a73..c20c9e7e08dd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -453,6 +453,7 @@ ENTRY(stub_execve) | |||
453 | CFI_REGISTER rip, r11 | 453 | CFI_REGISTER rip, r11 |
454 | SAVE_REST | 454 | SAVE_REST |
455 | FIXUP_TOP_OF_STACK %r11 | 455 | FIXUP_TOP_OF_STACK %r11 |
456 | movq %rsp, %rcx | ||
456 | call sys_execve | 457 | call sys_execve |
457 | RESTORE_TOP_OF_STACK %r11 | 458 | RESTORE_TOP_OF_STACK %r11 |
458 | movq %rax,RAX(%rsp) | 459 | movq %rax,RAX(%rsp) |
@@ -1036,15 +1037,16 @@ ENDPROC(child_rip) | |||
1036 | * rdi: name, rsi: argv, rdx: envp | 1037 | * rdi: name, rsi: argv, rdx: envp |
1037 | * | 1038 | * |
1038 | * We want to fallback into: | 1039 | * We want to fallback into: |
1039 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) | 1040 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) |
1040 | * | 1041 | * |
1041 | * do_sys_execve asm fallback arguments: | 1042 | * do_sys_execve asm fallback arguments: |
1042 | * rdi: name, rsi: argv, rdx: envp, fake frame on the stack | 1043 | * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack |
1043 | */ | 1044 | */ |
1044 | ENTRY(kernel_execve) | 1045 | ENTRY(kernel_execve) |
1045 | CFI_STARTPROC | 1046 | CFI_STARTPROC |
1046 | FAKE_STACK_FRAME $0 | 1047 | FAKE_STACK_FRAME $0 |
1047 | SAVE_ALL | 1048 | SAVE_ALL |
1049 | movq %rsp,%rcx | ||
1048 | call sys_execve | 1050 | call sys_execve |
1049 | movq %rax, RAX(%rsp) | 1051 | movq %rax, RAX(%rsp) |
1050 | RESTORE_REST | 1052 | RESTORE_REST |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 25eb98540a41..fd8ca53943a8 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -606,7 +606,7 @@ ENTRY(_stext) | |||
606 | .section ".bss.page_aligned","wa" | 606 | .section ".bss.page_aligned","wa" |
607 | .align PAGE_SIZE_asm | 607 | .align PAGE_SIZE_asm |
608 | #ifdef CONFIG_X86_PAE | 608 | #ifdef CONFIG_X86_PAE |
609 | ENTRY(swapper_pg_pmd) | 609 | swapper_pg_pmd: |
610 | .fill 1024*KPMDS,4,0 | 610 | .fill 1024*KPMDS,4,0 |
611 | #else | 611 | #else |
612 | ENTRY(swapper_pg_dir) | 612 | ENTRY(swapper_pg_dir) |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index eb415043a929..a007454133a3 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt) | |||
379 | /* Since I easily can, map the first 1G. | 379 | /* Since I easily can, map the first 1G. |
380 | * Don't set NX because code runs from these pages. | 380 | * Don't set NX because code runs from these pages. |
381 | */ | 381 | */ |
382 | PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) | 382 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) |
383 | 383 | ||
384 | NEXT_PAGE(level2_kernel_pgt) | 384 | NEXT_PAGE(level2_kernel_pgt) |
385 | /* 40MB kernel mapping. The kernel code cannot be bigger than that. | 385 | /* |
386 | When you change this change KERNEL_TEXT_SIZE in page.h too. */ | 386 | * 128 MB kernel mapping. We spend a full page on this pagetable |
387 | /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ | 387 | * anyway. |
388 | PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE) | 388 | * |
389 | /* Module mapping starts here */ | 389 | * The kernel code+data+bss must not be bigger than that. |
390 | .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0 | 390 | * |
391 | * (NOTE: at +128MB starts the module area, see MODULES_VADDR. | ||
392 | * If you want to increase this then increase MODULES_VADDR | ||
393 | * too.) | ||
394 | */ | ||
395 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, | ||
396 | KERNEL_IMAGE_SIZE/PMD_SIZE) | ||
391 | 397 | ||
392 | NEXT_PAGE(level2_spare_pgt) | 398 | NEXT_PAGE(level2_spare_pgt) |
393 | .fill 512,8,0 | 399 | .fill 512, 8, 0 |
394 | 400 | ||
395 | #undef PMDS | 401 | #undef PMDS |
396 | #undef NEXT_PAGE | 402 | #undef NEXT_PAGE |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 429d084e014d..235fd6c77504 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -368,8 +368,8 @@ static int hpet_clocksource_register(void) | |||
368 | return 0; | 368 | return 0; |
369 | } | 369 | } |
370 | 370 | ||
371 | /* | 371 | /** |
372 | * Try to setup the HPET timer | 372 | * hpet_enable - Try to setup the HPET timer. Returns 1 on success. |
373 | */ | 373 | */ |
374 | int __init hpet_enable(void) | 374 | int __init hpet_enable(void) |
375 | { | 375 | { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b0cc8f0136d8..43f287744f9f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -730,16 +730,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
730 | */ | 730 | */ |
731 | asmlinkage | 731 | asmlinkage |
732 | long sys_execve(char __user *name, char __user * __user *argv, | 732 | long sys_execve(char __user *name, char __user * __user *argv, |
733 | char __user * __user *envp, struct pt_regs regs) | 733 | char __user * __user *envp, struct pt_regs *regs) |
734 | { | 734 | { |
735 | long error; | 735 | long error; |
736 | char * filename; | 736 | char * filename; |
737 | 737 | ||
738 | filename = getname(name); | 738 | filename = getname(name); |
739 | error = PTR_ERR(filename); | 739 | error = PTR_ERR(filename); |
740 | if (IS_ERR(filename)) | 740 | if (IS_ERR(filename)) |
741 | return error; | 741 | return error; |
742 | error = do_execve(filename, argv, envp, ®s); | 742 | error = do_execve(filename, argv, envp, regs); |
743 | putname(filename); | 743 | putname(filename); |
744 | return error; | 744 | return error; |
745 | } | 745 | } |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6fd804f07821..7637dc91c79b 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -1021,7 +1021,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
1021 | 1021 | ||
1022 | /* Clear all flags overriden by options */ | 1022 | /* Clear all flags overriden by options */ |
1023 | for (i = 0; i < NCAPINTS; i++) | 1023 | for (i = 0; i < NCAPINTS; i++) |
1024 | c->x86_capability[i] ^= cleared_cpu_caps[i]; | 1024 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; |
1025 | 1025 | ||
1026 | #ifdef CONFIG_X86_MCE | 1026 | #ifdef CONFIG_X86_MCE |
1027 | mcheck_init(c); | 1027 | mcheck_init(c); |
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index d53bd6fcb428..0880f2c388a9 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c | |||
@@ -554,10 +554,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
554 | int timeout; | 554 | int timeout; |
555 | unsigned long start_rip; | 555 | unsigned long start_rip; |
556 | struct create_idle c_idle = { | 556 | struct create_idle c_idle = { |
557 | .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle), | ||
558 | .cpu = cpu, | 557 | .cpu = cpu, |
559 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | 558 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), |
560 | }; | 559 | }; |
560 | INIT_WORK(&c_idle.work, do_fork_idle); | ||
561 | 561 | ||
562 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | 562 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ |
563 | if (!cpu_gdt_descr[cpu].address && | 563 | if (!cpu_gdt_descr[cpu].address && |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 02f0f61f5b11..c28c342c162f 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -25,6 +25,8 @@ static int save_stack_stack(void *data, char *name) | |||
25 | static void save_stack_address(void *data, unsigned long addr, int reliable) | 25 | static void save_stack_address(void *data, unsigned long addr, int reliable) |
26 | { | 26 | { |
27 | struct stack_trace *trace = data; | 27 | struct stack_trace *trace = data; |
28 | if (!reliable) | ||
29 | return; | ||
28 | if (trace->skip > 0) { | 30 | if (trace->skip > 0) { |
29 | trace->skip--; | 31 | trace->skip--; |
30 | return; | 32 | return; |
@@ -37,6 +39,8 @@ static void | |||
37 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) | 39 | save_stack_address_nosched(void *data, unsigned long addr, int reliable) |
38 | { | 40 | { |
39 | struct stack_trace *trace = (struct stack_trace *)data; | 41 | struct stack_trace *trace = (struct stack_trace *)data; |
42 | if (!reliable) | ||
43 | return; | ||
40 | if (in_sched_functions(addr)) | 44 | if (in_sched_functions(addr)) |
41 | return; | 45 | return; |
42 | if (trace->skip > 0) { | 46 | if (trace->skip > 0) { |
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 43517e324be8..f14cfd9d1f94 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c | |||
@@ -28,7 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz); | |||
28 | static int __init tsc_setup(char *str) | 28 | static int __init tsc_setup(char *str) |
29 | { | 29 | { |
30 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | 30 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " |
31 | "cannot disable TSC.\n"); | 31 | "cannot disable TSC completely.\n"); |
32 | mark_tsc_unstable("user disabled TSC"); | ||
32 | return 1; | 33 | return 1; |
33 | } | 34 | } |
34 | #else | 35 | #else |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3f8242774580..b6be812fac05 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -44,11 +44,6 @@ | |||
44 | 44 | ||
45 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 45 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
46 | #define __syscall_clobber "r11","cx","memory" | 46 | #define __syscall_clobber "r11","cx","memory" |
47 | #define __pa_vsymbol(x) \ | ||
48 | ({unsigned long v; \ | ||
49 | extern char __vsyscall_0; \ | ||
50 | asm("" : "=r" (v) : "0" (x)); \ | ||
51 | ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) | ||
52 | 47 | ||
53 | /* | 48 | /* |
54 | * vsyscall_gtod_data contains data that is : | 49 | * vsyscall_gtod_data contains data that is : |
@@ -102,7 +97,7 @@ static __always_inline void do_get_tz(struct timezone * tz) | |||
102 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 97 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) |
103 | { | 98 | { |
104 | int ret; | 99 | int ret; |
105 | asm volatile("vsysc2: syscall" | 100 | asm volatile("syscall" |
106 | : "=a" (ret) | 101 | : "=a" (ret) |
107 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) | 102 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) |
108 | : __syscall_clobber ); | 103 | : __syscall_clobber ); |
@@ -112,7 +107,7 @@ static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | |||
112 | static __always_inline long time_syscall(long *t) | 107 | static __always_inline long time_syscall(long *t) |
113 | { | 108 | { |
114 | long secs; | 109 | long secs; |
115 | asm volatile("vsysc1: syscall" | 110 | asm volatile("syscall" |
116 | : "=a" (secs) | 111 | : "=a" (secs) |
117 | : "0" (__NR_time),"D" (t) : __syscall_clobber); | 112 | : "0" (__NR_time),"D" (t) : __syscall_clobber); |
118 | return secs; | 113 | return secs; |
@@ -227,50 +222,10 @@ long __vsyscall(3) venosys_1(void) | |||
227 | } | 222 | } |
228 | 223 | ||
229 | #ifdef CONFIG_SYSCTL | 224 | #ifdef CONFIG_SYSCTL |
230 | |||
231 | #define SYSCALL 0x050f | ||
232 | #define NOP2 0x9090 | ||
233 | |||
234 | /* | ||
235 | * NOP out syscall in vsyscall page when not needed. | ||
236 | */ | ||
237 | static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | ||
238 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
239 | { | ||
240 | extern u16 vsysc1, vsysc2; | ||
241 | u16 __iomem *map1; | ||
242 | u16 __iomem *map2; | ||
243 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | ||
244 | if (!write) | ||
245 | return ret; | ||
246 | /* gcc has some trouble with __va(__pa()), so just do it this | ||
247 | way. */ | ||
248 | map1 = ioremap(__pa_vsymbol(&vsysc1), 2); | ||
249 | if (!map1) | ||
250 | return -ENOMEM; | ||
251 | map2 = ioremap(__pa_vsymbol(&vsysc2), 2); | ||
252 | if (!map2) { | ||
253 | ret = -ENOMEM; | ||
254 | goto out; | ||
255 | } | ||
256 | if (!vsyscall_gtod_data.sysctl_enabled) { | ||
257 | writew(SYSCALL, map1); | ||
258 | writew(SYSCALL, map2); | ||
259 | } else { | ||
260 | writew(NOP2, map1); | ||
261 | writew(NOP2, map2); | ||
262 | } | ||
263 | iounmap(map2); | ||
264 | out: | ||
265 | iounmap(map1); | ||
266 | return ret; | ||
267 | } | ||
268 | |||
269 | static ctl_table kernel_table2[] = { | 225 | static ctl_table kernel_table2[] = { |
270 | { .procname = "vsyscall64", | 226 | { .procname = "vsyscall64", |
271 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), | 227 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), |
272 | .mode = 0644, | 228 | .mode = 0644 }, |
273 | .proc_handler = vsyscall_sysctl_change }, | ||
274 | {} | 229 | {} |
275 | }; | 230 | }; |
276 | 231 | ||
@@ -279,7 +234,6 @@ static ctl_table kernel_root_table2[] = { | |||
279 | .child = kernel_table2 }, | 234 | .child = kernel_table2 }, |
280 | {} | 235 | {} |
281 | }; | 236 | }; |
282 | |||
283 | #endif | 237 | #endif |
284 | 238 | ||
285 | /* Assume __initcall executes before all user space. Hopefully kmod | 239 | /* Assume __initcall executes before all user space. Hopefully kmod |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5afdde4895dc..cccb38a59653 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/lguest_launcher.h> | 57 | #include <linux/lguest_launcher.h> |
58 | #include <linux/virtio_console.h> | 58 | #include <linux/virtio_console.h> |
59 | #include <linux/pm.h> | 59 | #include <linux/pm.h> |
60 | #include <asm/lguest.h> | ||
60 | #include <asm/paravirt.h> | 61 | #include <asm/paravirt.h> |
61 | #include <asm/param.h> | 62 | #include <asm/param.h> |
62 | #include <asm/page.h> | 63 | #include <asm/page.h> |
@@ -75,15 +76,6 @@ | |||
75 | * behaving in simplified but equivalent ways. In particular, the Guest is the | 76 | * behaving in simplified but equivalent ways. In particular, the Guest is the |
76 | * same kernel as the Host (or at least, built from the same source code). :*/ | 77 | * same kernel as the Host (or at least, built from the same source code). :*/ |
77 | 78 | ||
78 | /* Declarations for definitions in lguest_guest.S */ | ||
79 | extern char lguest_noirq_start[], lguest_noirq_end[]; | ||
80 | extern const char lgstart_cli[], lgend_cli[]; | ||
81 | extern const char lgstart_sti[], lgend_sti[]; | ||
82 | extern const char lgstart_popf[], lgend_popf[]; | ||
83 | extern const char lgstart_pushf[], lgend_pushf[]; | ||
84 | extern const char lgstart_iret[], lgend_iret[]; | ||
85 | extern void lguest_iret(void); | ||
86 | |||
87 | struct lguest_data lguest_data = { | 79 | struct lguest_data lguest_data = { |
88 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, | 80 | .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, |
89 | .noirq_start = (u32)lguest_noirq_start, | 81 | .noirq_start = (u32)lguest_noirq_start, |
@@ -489,7 +481,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
489 | { | 481 | { |
490 | *pmdp = pmdval; | 482 | *pmdp = pmdval; |
491 | lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, | 483 | lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, |
492 | (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); | 484 | (__pa(pmdp)&(PAGE_SIZE-1)), 0); |
493 | } | 485 | } |
494 | 486 | ||
495 | /* There are a couple of legacy places where the kernel sets a PTE, but we | 487 | /* There are a couple of legacy places where the kernel sets a PTE, but we |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bb652f5a93fb..a02a14f0f324 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) | |||
172 | } | 172 | } |
173 | 173 | ||
174 | /* | 174 | /* |
175 | * The head.S code sets up the kernel high mapping from: | 175 | * The head.S code sets up the kernel high mapping: |
176 | * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE | 176 | * |
177 | * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) | ||
177 | * | 178 | * |
178 | * phys_addr holds the negative offset to the kernel, which is added | 179 | * phys_addr holds the negative offset to the kernel, which is added |
179 | * to the compile time generated pmds. This results in invalid pmds up | 180 | * to the compile time generated pmds. This results in invalid pmds up |
@@ -515,14 +516,6 @@ void __init mem_init(void) | |||
515 | 516 | ||
516 | /* clear_bss() already clear the empty_zero_page */ | 517 | /* clear_bss() already clear the empty_zero_page */ |
517 | 518 | ||
518 | /* temporary debugging - double check it's true: */ | ||
519 | { | ||
520 | int i; | ||
521 | |||
522 | for (i = 0; i < 1024; i++) | ||
523 | WARN_ON_ONCE(empty_zero_page[i]); | ||
524 | } | ||
525 | |||
526 | reservedpages = 0; | 519 | reservedpages = 0; |
527 | 520 | ||
528 | /* this will put all low memory onto the freelists */ | 521 | /* this will put all low memory onto the freelists */ |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 464d8fc21ce6..14e48b5a94ba 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void) | |||
44 | 44 | ||
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
48 | # define debug_pagealloc 1 | ||
49 | #else | ||
50 | # define debug_pagealloc 0 | ||
51 | #endif | ||
52 | |||
47 | static inline int | 53 | static inline int |
48 | within(unsigned long addr, unsigned long start, unsigned long end) | 54 | within(unsigned long addr, unsigned long start, unsigned long end) |
49 | { | 55 | { |
@@ -355,45 +361,48 @@ out_unlock: | |||
355 | 361 | ||
356 | static LIST_HEAD(page_pool); | 362 | static LIST_HEAD(page_pool); |
357 | static unsigned long pool_size, pool_pages, pool_low; | 363 | static unsigned long pool_size, pool_pages, pool_low; |
358 | static unsigned long pool_used, pool_failed, pool_refill; | 364 | static unsigned long pool_used, pool_failed; |
359 | 365 | ||
360 | static void cpa_fill_pool(void) | 366 | static void cpa_fill_pool(struct page **ret) |
361 | { | 367 | { |
362 | struct page *p; | ||
363 | gfp_t gfp = GFP_KERNEL; | 368 | gfp_t gfp = GFP_KERNEL; |
369 | unsigned long flags; | ||
370 | struct page *p; | ||
364 | 371 | ||
365 | /* Do not allocate from interrupt context */ | ||
366 | if (in_irq() || irqs_disabled()) | ||
367 | return; | ||
368 | /* | 372 | /* |
369 | * Check unlocked. I does not matter when we have one more | 373 | * Avoid recursion (on debug-pagealloc) and also signal |
370 | * page in the pool. The bit lock avoids recursive pool | 374 | * our priority to get to these pagetables: |
371 | * allocations: | ||
372 | */ | 375 | */ |
373 | if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) | 376 | if (current->flags & PF_MEMALLOC) |
374 | return; | 377 | return; |
378 | current->flags |= PF_MEMALLOC; | ||
375 | 379 | ||
376 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
377 | /* | 380 | /* |
378 | * We could do: | 381 | * Allocate atomically from atomic contexts: |
379 | * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; | ||
380 | * but this fails on !PREEMPT kernels | ||
381 | */ | 382 | */ |
382 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | 383 | if (in_atomic() || irqs_disabled() || debug_pagealloc) |
383 | #endif | 384 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; |
384 | 385 | ||
385 | while (pool_pages < pool_size) { | 386 | while (pool_pages < pool_size || (ret && !*ret)) { |
386 | p = alloc_pages(gfp, 0); | 387 | p = alloc_pages(gfp, 0); |
387 | if (!p) { | 388 | if (!p) { |
388 | pool_failed++; | 389 | pool_failed++; |
389 | break; | 390 | break; |
390 | } | 391 | } |
391 | spin_lock_irq(&pgd_lock); | 392 | /* |
393 | * If the call site needs a page right now, provide it: | ||
394 | */ | ||
395 | if (ret && !*ret) { | ||
396 | *ret = p; | ||
397 | continue; | ||
398 | } | ||
399 | spin_lock_irqsave(&pgd_lock, flags); | ||
392 | list_add(&p->lru, &page_pool); | 400 | list_add(&p->lru, &page_pool); |
393 | pool_pages++; | 401 | pool_pages++; |
394 | spin_unlock_irq(&pgd_lock); | 402 | spin_unlock_irqrestore(&pgd_lock, flags); |
395 | } | 403 | } |
396 | clear_bit_unlock(0, &pool_refill); | 404 | |
405 | current->flags &= ~PF_MEMALLOC; | ||
397 | } | 406 | } |
398 | 407 | ||
399 | #define SHIFT_MB (20 - PAGE_SHIFT) | 408 | #define SHIFT_MB (20 - PAGE_SHIFT) |
@@ -414,11 +423,15 @@ void __init cpa_init(void) | |||
414 | * GiB. Shift MiB to Gib and multiply the result by | 423 | * GiB. Shift MiB to Gib and multiply the result by |
415 | * POOL_PAGES_PER_GB: | 424 | * POOL_PAGES_PER_GB: |
416 | */ | 425 | */ |
417 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | 426 | if (debug_pagealloc) { |
418 | pool_size = POOL_PAGES_PER_GB * gb; | 427 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; |
428 | pool_size = POOL_PAGES_PER_GB * gb; | ||
429 | } else { | ||
430 | pool_size = 1; | ||
431 | } | ||
419 | pool_low = pool_size; | 432 | pool_low = pool_size; |
420 | 433 | ||
421 | cpa_fill_pool(); | 434 | cpa_fill_pool(NULL); |
422 | printk(KERN_DEBUG | 435 | printk(KERN_DEBUG |
423 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | 436 | "CPA: page pool initialized %lu of %lu pages preallocated\n", |
424 | pool_pages, pool_size); | 437 | pool_pages, pool_size); |
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
440 | spin_lock_irqsave(&pgd_lock, flags); | 453 | spin_lock_irqsave(&pgd_lock, flags); |
441 | if (list_empty(&page_pool)) { | 454 | if (list_empty(&page_pool)) { |
442 | spin_unlock_irqrestore(&pgd_lock, flags); | 455 | spin_unlock_irqrestore(&pgd_lock, flags); |
443 | return -ENOMEM; | 456 | base = NULL; |
457 | cpa_fill_pool(&base); | ||
458 | if (!base) | ||
459 | return -ENOMEM; | ||
460 | spin_lock_irqsave(&pgd_lock, flags); | ||
461 | } else { | ||
462 | base = list_first_entry(&page_pool, struct page, lru); | ||
463 | list_del(&base->lru); | ||
464 | pool_pages--; | ||
465 | |||
466 | if (pool_pages < pool_low) | ||
467 | pool_low = pool_pages; | ||
444 | } | 468 | } |
445 | 469 | ||
446 | base = list_first_entry(&page_pool, struct page, lru); | ||
447 | list_del(&base->lru); | ||
448 | pool_pages--; | ||
449 | |||
450 | if (pool_pages < pool_low) | ||
451 | pool_low = pool_pages; | ||
452 | |||
453 | /* | 470 | /* |
454 | * Check for races, another CPU might have split this page | 471 | * Check for races, another CPU might have split this page |
455 | * up for us already: | 472 | * up for us already: |
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
734 | cpa_flush_all(cache); | 751 | cpa_flush_all(cache); |
735 | 752 | ||
736 | out: | 753 | out: |
737 | cpa_fill_pool(); | 754 | cpa_fill_pool(NULL); |
755 | |||
738 | return ret; | 756 | return ret; |
739 | } | 757 | } |
740 | 758 | ||
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
897 | * Try to refill the page pool here. We can do this only after | 915 | * Try to refill the page pool here. We can do this only after |
898 | * the tlb flush. | 916 | * the tlb flush. |
899 | */ | 917 | */ |
900 | cpa_fill_pool(); | 918 | cpa_fill_pool(NULL); |
901 | } | 919 | } |
902 | 920 | ||
903 | #ifdef CONFIG_HIBERNATION | 921 | #ifdef CONFIG_HIBERNATION |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index f385a4b4a484..b8bd0c4aa02e 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -48,7 +48,7 @@ obj-$(VDSO64-y) += vdso-syms.lds | |||
48 | # Match symbols in the DSO that look like VDSO*; produce a file of constants. | 48 | # Match symbols in the DSO that look like VDSO*; produce a file of constants. |
49 | # | 49 | # |
50 | sed-vdsosym := -e 's/^00*/0/' \ | 50 | sed-vdsosym := -e 's/^00*/0/' \ |
51 | -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' | 51 | -e 's/^\([[:xdigit:]]*\) . \(VDSO[[:alnum:]_]*\)$$/\2 = 0x\1;/p' |
52 | quiet_cmd_vdsosym = VDSOSYM $@ | 52 | quiet_cmd_vdsosym = VDSOSYM $@ |
53 | cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ | 53 | cmd_vdsosym = $(NM) $< | sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ |
54 | 54 | ||