aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 16:54:09 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 16:54:09 -0500
commit3e6bdf473f489664dac4d7511d26c7ac3dfdc748 (patch)
tree10cb2e928830b9de8bbc3f6dd47c18c24cd2affa
parent3d4d4582e5b3f67a68f2cf32fd5b70d8d80f119d (diff)
parent58d5d0d8dd52cbca988af24b5692a20b00285543 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86
* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: x86: fix deadlock, make pgd_lock irq-safe virtio: fix trivial build bug x86: fix mttr trimming x86: delay CPA self-test and repeat it x86: fix 64-bit sections generic: add __FINITDATA x86: remove suprious ifdefs from pageattr.c x86: mark the .rodata section also NX x86: fix iret exception recovery on 64-bit cpuidle: dubious one-bit signed bitfield in cpuidle.h x86: fix sparse warnings in powernow-k8.c x86: fix sparse error in traps_32.c x86: trivial sparse/checkpatch in quirks.c x86 ptrace: disallow null cs/ss MAINTAINERS: RDC R-321x SoC maintainer brk randomization: introduce CONFIG_COMPAT_BRK brk: check the lower bound properly x86: remove X2 workaround x86: make spurious fault handler aware of large mappings x86: make traps on entry code be debuggable in user space, 64-bit
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c19
-rw-r--r--arch/x86/kernel/entry_64.S24
-rw-r--r--arch/x86/kernel/head_64.S15
-rw-r--r--arch/x86/kernel/ptrace.c25
-rw-r--r--arch/x86/kernel/quirks.c26
-rw-r--r--arch/x86/kernel/test_nx.c2
-rw-r--r--arch/x86/kernel/traps_32.c15
-rw-r--r--arch/x86/mm/fault.c28
-rw-r--r--arch/x86/mm/init_64.c9
-rw-r--r--arch/x86/mm/pageattr-test.c65
-rw-r--r--arch/x86/mm/pageattr.c14
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--include/asm-x86/pgalloc_64.h10
-rw-r--r--include/linux/cpuidle.h2
-rw-r--r--include/linux/init.h1
-rw-r--r--init/Kconfig12
-rw-r--r--mm/memory.c13
-rw-r--r--mm/mmap.c2
21 files changed, 193 insertions, 102 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c5325d2bb86d..0885aa2b095a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3224,6 +3224,12 @@ M: mporter@kernel.crashing.org
3224L: linux-kernel@vger.kernel.org 3224L: linux-kernel@vger.kernel.org
3225S: Maintained 3225S: Maintained
3226 3226
3227RDC R-321X SoC
3228P: Florian Fainelli
3229M: florian.fainelli@telecomint.eu
3230L: linux-kernel@vger.kernel.org
3231S: Maintained
3232
3227RDC R6040 FAST ETHERNET DRIVER 3233RDC R6040 FAST ETHERNET DRIVER
3228P: Florian Fainelli 3234P: Florian Fainelli
3229M: florian.fainelli@telecomint.eu 3235M: florian.fainelli@telecomint.eu
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e1e3af28c3a..fa555148823d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -220,9 +220,9 @@ config DEBUG_BOOT_PARAMS
220 This option will cause struct boot_params to be exported via debugfs. 220 This option will cause struct boot_params to be exported via debugfs.
221 221
222config CPA_DEBUG 222config CPA_DEBUG
223 bool "CPA self test code" 223 bool "CPA self-test code"
224 depends on DEBUG_KERNEL 224 depends on DEBUG_KERNEL
225 help 225 help
226 Do change_page_attr self tests at boot. 226 Do change_page_attr() self-tests every 30 seconds.
227 227
228endmenu 228endmenu
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a0522735dd9d..5affe91ca1e5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -827,7 +827,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
827 827
828 for (i = 0; i < data->acpi_data.state_count; i++) { 828 for (i = 0; i < data->acpi_data.state_count; i++) {
829 u32 index; 829 u32 index;
830 u32 hi = 0, lo = 0;
831 830
832 index = data->acpi_data.states[i].control & HW_PSTATE_MASK; 831 index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
833 if (index > data->max_hw_pstate) { 832 if (index > data->max_hw_pstate) {
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1e27b69a7a0e..b6e136f23d3d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -659,7 +659,7 @@ static __init int amd_special_default_mtrr(void)
659 */ 659 */
660int __init mtrr_trim_uncached_memory(unsigned long end_pfn) 660int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
661{ 661{
662 unsigned long i, base, size, highest_addr = 0, def, dummy; 662 unsigned long i, base, size, highest_pfn = 0, def, dummy;
663 mtrr_type type; 663 mtrr_type type;
664 u64 trim_start, trim_size; 664 u64 trim_start, trim_size;
665 665
@@ -682,28 +682,27 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
682 mtrr_if->get(i, &base, &size, &type); 682 mtrr_if->get(i, &base, &size, &type);
683 if (type != MTRR_TYPE_WRBACK) 683 if (type != MTRR_TYPE_WRBACK)
684 continue; 684 continue;
685 base <<= PAGE_SHIFT; 685 if (highest_pfn < base + size)
686 size <<= PAGE_SHIFT; 686 highest_pfn = base + size;
687 if (highest_addr < base + size)
688 highest_addr = base + size;
689 } 687 }
690 688
691 /* kvm/qemu doesn't have mtrr set right, don't trim them all */ 689 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
692 if (!highest_addr) { 690 if (!highest_pfn) {
693 printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); 691 printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
694 WARN_ON(1); 692 WARN_ON(1);
695 return 0; 693 return 0;
696 } 694 }
697 695
698 if ((highest_addr >> PAGE_SHIFT) < end_pfn) { 696 if (highest_pfn < end_pfn) {
699 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" 697 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
700 " all of memory, losing %LdMB of RAM.\n", 698 " all of memory, losing %luMB of RAM.\n",
701 (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20); 699 (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
702 700
703 WARN_ON(1); 701 WARN_ON(1);
704 702
705 printk(KERN_INFO "update e820 for mtrr\n"); 703 printk(KERN_INFO "update e820 for mtrr\n");
706 trim_start = highest_addr; 704 trim_start = highest_pfn;
705 trim_start <<= PAGE_SHIFT;
707 trim_size = end_pfn; 706 trim_size = end_pfn;
708 trim_size <<= PAGE_SHIFT; 707 trim_size <<= PAGE_SHIFT;
709 trim_size -= trim_start; 708 trim_size -= trim_start;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bea8474744ff..c7341e81941c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -582,7 +582,6 @@ retint_restore_args: /* return to kernel space */
582 TRACE_IRQS_IRETQ 582 TRACE_IRQS_IRETQ
583restore_args: 583restore_args:
584 RESTORE_ARGS 0,8,0 584 RESTORE_ARGS 0,8,0
585iret_label:
586#ifdef CONFIG_PARAVIRT 585#ifdef CONFIG_PARAVIRT
587 INTERRUPT_RETURN 586 INTERRUPT_RETURN
588#endif 587#endif
@@ -593,13 +592,22 @@ ENTRY(native_iret)
593 .quad native_iret, bad_iret 592 .quad native_iret, bad_iret
594 .previous 593 .previous
595 .section .fixup,"ax" 594 .section .fixup,"ax"
596 /* force a signal here? this matches i386 behaviour */
597 /* running with kernel gs */
598bad_iret: 595bad_iret:
599 movq $11,%rdi /* SIGSEGV */ 596 /*
600 TRACE_IRQS_ON 597 * The iret traps when the %cs or %ss being restored is bogus.
601 ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 598 * We've lost the original trap vector and error code.
602 jmp do_exit 599 * #GPF is the most likely one to get for an invalid selector.
600 * So pretend we completed the iret and took the #GPF in user mode.
601 *
602 * We are now running with the kernel GS after exception recovery.
603 * But error_entry expects us to have user GS to match the user %cs,
604 * so swap back.
605 */
606 pushq $0
607
608 SWAPGS
609 jmp general_protection
610
603 .previous 611 .previous
604 612
605 /* edi: workmask, edx: work */ 613 /* edi: workmask, edx: work */
@@ -911,7 +919,7 @@ error_kernelspace:
911 iret run with kernel gs again, so don't set the user space flag. 919 iret run with kernel gs again, so don't set the user space flag.
912 B stepping K8s sometimes report an truncated RIP for IRET 920 B stepping K8s sometimes report an truncated RIP for IRET
913 exceptions returning to compat mode. Check for these here too. */ 921 exceptions returning to compat mode. Check for these here too. */
914 leaq iret_label(%rip),%rbp 922 leaq native_iret(%rip),%rbp
915 cmpq %rbp,RIP(%rsp) 923 cmpq %rbp,RIP(%rsp)
916 je error_swapgs 924 je error_swapgs
917 movl %ebp,%ebp /* zero extend */ 925 movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 4f283ad215ec..09b38d539b09 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -250,18 +250,13 @@ ENTRY(secondary_startup_64)
250 lretq 250 lretq
251 251
252 /* SMP bootup changes these two */ 252 /* SMP bootup changes these two */
253#ifndef CONFIG_HOTPLUG_CPU 253 __CPUINITDATA
254 .pushsection .init.data
255#endif
256 .align 8 254 .align 8
257 .globl initial_code 255 ENTRY(initial_code)
258initial_code:
259 .quad x86_64_start_kernel 256 .quad x86_64_start_kernel
260#ifndef CONFIG_HOTPLUG_CPU 257 __FINITDATA
261 .popsection 258
262#endif 259 ENTRY(init_rsp)
263 .globl init_rsp
264init_rsp:
265 .quad init_thread_union+THREAD_SIZE-8 260 .quad init_thread_union+THREAD_SIZE-8
266 261
267bad_address: 262bad_address:
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 96286df1bb81..702c33efea84 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -103,9 +103,26 @@ static int set_segment_reg(struct task_struct *task,
103 if (invalid_selector(value)) 103 if (invalid_selector(value))
104 return -EIO; 104 return -EIO;
105 105
106 if (offset != offsetof(struct user_regs_struct, gs)) 106 /*
107 * For %cs and %ss we cannot permit a null selector.
108 * We can permit a bogus selector as long as it has USER_RPL.
109 * Null selectors are fine for other segment registers, but
110 * we will never get back to user mode with invalid %cs or %ss
111 * and will take the trap in iret instead. Much code relies
112 * on user_mode() to distinguish a user trap frame (which can
113 * safely use invalid selectors) from a kernel trap frame.
114 */
115 switch (offset) {
116 case offsetof(struct user_regs_struct, cs):
117 case offsetof(struct user_regs_struct, ss):
118 if (unlikely(value == 0))
119 return -EIO;
120
121 default:
107 *pt_regs_access(task_pt_regs(task), offset) = value; 122 *pt_regs_access(task_pt_regs(task), offset) = value;
108 else { 123 break;
124
125 case offsetof(struct user_regs_struct, gs):
109 task->thread.gs = value; 126 task->thread.gs = value;
110 if (task == current) 127 if (task == current)
111 /* 128 /*
@@ -227,12 +244,16 @@ static int set_segment_reg(struct task_struct *task,
227 * Can't actually change these in 64-bit mode. 244 * Can't actually change these in 64-bit mode.
228 */ 245 */
229 case offsetof(struct user_regs_struct,cs): 246 case offsetof(struct user_regs_struct,cs):
247 if (unlikely(value == 0))
248 return -EIO;
230#ifdef CONFIG_IA32_EMULATION 249#ifdef CONFIG_IA32_EMULATION
231 if (test_tsk_thread_flag(task, TIF_IA32)) 250 if (test_tsk_thread_flag(task, TIF_IA32))
232 task_pt_regs(task)->cs = value; 251 task_pt_regs(task)->cs = value;
233#endif 252#endif
234 break; 253 break;
235 case offsetof(struct user_regs_struct,ss): 254 case offsetof(struct user_regs_struct,ss):
255 if (unlikely(value == 0))
256 return -EIO;
236#ifdef CONFIG_IA32_EMULATION 257#ifdef CONFIG_IA32_EMULATION
237 if (test_tsk_thread_flag(task, TIF_IA32)) 258 if (test_tsk_thread_flag(task, TIF_IA32))
238 task_pt_regs(task)->ss = value; 259 task_pt_regs(task)->ss = value;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 3cd7a2dcd4fe..6ba33ca8715a 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -380,19 +380,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367,
380void force_hpet_resume(void) 380void force_hpet_resume(void)
381{ 381{
382 switch (force_hpet_resume_type) { 382 switch (force_hpet_resume_type) {
383 case ICH_FORCE_HPET_RESUME: 383 case ICH_FORCE_HPET_RESUME:
384 return ich_force_hpet_resume(); 384 ich_force_hpet_resume();
385 385 return;
386 case OLD_ICH_FORCE_HPET_RESUME: 386 case OLD_ICH_FORCE_HPET_RESUME:
387 return old_ich_force_hpet_resume(); 387 old_ich_force_hpet_resume();
388 388 return;
389 case VT8237_FORCE_HPET_RESUME: 389 case VT8237_FORCE_HPET_RESUME:
390 return vt8237_force_hpet_resume(); 390 vt8237_force_hpet_resume();
391 391 return;
392 case NVIDIA_FORCE_HPET_RESUME: 392 case NVIDIA_FORCE_HPET_RESUME:
393 return nvidia_force_hpet_resume(); 393 nvidia_force_hpet_resume();
394 394 return;
395 default: 395 default:
396 break; 396 break;
397 } 397 }
398} 398}
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 36c100c323aa..10b8a6f69f84 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -139,7 +139,6 @@ static int test_NX(void)
139 * Until then, don't run them to avoid too many people getting scared 139 * Until then, don't run them to avoid too many people getting scared
140 * by the error message 140 * by the error message
141 */ 141 */
142#if 0
143 142
144#ifdef CONFIG_DEBUG_RODATA 143#ifdef CONFIG_DEBUG_RODATA
145 /* Test 3: Check if the .rodata section is executable */ 144 /* Test 3: Check if the .rodata section is executable */
@@ -152,6 +151,7 @@ static int test_NX(void)
152 } 151 }
153#endif 152#endif
154 153
154#if 0
155 /* Test 4: Check if the .data section of a module is executable */ 155 /* Test 4: Check if the .data section of a module is executable */
156 if (test_address(&test_data)) { 156 if (test_address(&test_data)) {
157 printk(KERN_ERR "test_nx: .data section is executable\n"); 157 printk(KERN_ERR "test_nx: .data section is executable\n");
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 3cf72977d012..b22c01e05a18 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1176,17 +1176,12 @@ void __init trap_init(void)
1176#endif 1176#endif
1177 set_trap_gate(19,&simd_coprocessor_error); 1177 set_trap_gate(19,&simd_coprocessor_error);
1178 1178
1179 /*
1180 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1181 * Generate a build-time error if the alignment is wrong.
1182 */
1183 BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
1179 if (cpu_has_fxsr) { 1184 if (cpu_has_fxsr) {
1180 /*
1181 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
1182 * Generates a compile-time "error: zero width for bit-field" if
1183 * the alignment is wrong.
1184 */
1185 struct fxsrAlignAssert {
1186 int _:!(offsetof(struct task_struct,
1187 thread.i387.fxsave) & 15);
1188 };
1189
1190 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1185 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1191 set_in_cr4(X86_CR4_OSFXSR); 1186 set_in_cr4(X86_CR4_OSFXSR);
1192 printk("done.\n"); 1187 printk("done.\n");
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ad8b9733d6b3..621afb6343dc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -428,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
428} 428}
429#endif 429#endif
430 430
431static int spurious_fault_check(unsigned long error_code, pte_t *pte)
432{
433 if ((error_code & PF_WRITE) && !pte_write(*pte))
434 return 0;
435 if ((error_code & PF_INSTR) && !pte_exec(*pte))
436 return 0;
437
438 return 1;
439}
440
431/* 441/*
432 * Handle a spurious fault caused by a stale TLB entry. This allows 442 * Handle a spurious fault caused by a stale TLB entry. This allows
433 * us to lazily refresh the TLB when increasing the permissions of a 443 * us to lazily refresh the TLB when increasing the permissions of a
@@ -457,20 +467,21 @@ static int spurious_fault(unsigned long address,
457 if (!pud_present(*pud)) 467 if (!pud_present(*pud))
458 return 0; 468 return 0;
459 469
470 if (pud_large(*pud))
471 return spurious_fault_check(error_code, (pte_t *) pud);
472
460 pmd = pmd_offset(pud, address); 473 pmd = pmd_offset(pud, address);
461 if (!pmd_present(*pmd)) 474 if (!pmd_present(*pmd))
462 return 0; 475 return 0;
463 476
477 if (pmd_large(*pmd))
478 return spurious_fault_check(error_code, (pte_t *) pmd);
479
464 pte = pte_offset_kernel(pmd, address); 480 pte = pte_offset_kernel(pmd, address);
465 if (!pte_present(*pte)) 481 if (!pte_present(*pte))
466 return 0; 482 return 0;
467 483
468 if ((error_code & PF_WRITE) && !pte_write(*pte)) 484 return spurious_fault_check(error_code, pte);
469 return 0;
470 if ((error_code & PF_INSTR) && !pte_exec(*pte))
471 return 0;
472
473 return 1;
474} 485}
475 486
476/* 487/*
@@ -947,11 +958,12 @@ void vmalloc_sync_all(void)
947 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { 958 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
948 if (!test_bit(pgd_index(address), insync)) { 959 if (!test_bit(pgd_index(address), insync)) {
949 const pgd_t *pgd_ref = pgd_offset_k(address); 960 const pgd_t *pgd_ref = pgd_offset_k(address);
961 unsigned long flags;
950 struct page *page; 962 struct page *page;
951 963
952 if (pgd_none(*pgd_ref)) 964 if (pgd_none(*pgd_ref))
953 continue; 965 continue;
954 spin_lock(&pgd_lock); 966 spin_lock_irqsave(&pgd_lock, flags);
955 list_for_each_entry(page, &pgd_list, lru) { 967 list_for_each_entry(page, &pgd_list, lru) {
956 pgd_t *pgd; 968 pgd_t *pgd;
957 pgd = (pgd_t *)page_address(page) + pgd_index(address); 969 pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -960,7 +972,7 @@ void vmalloc_sync_all(void)
960 else 972 else
961 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); 973 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
962 } 974 }
963 spin_unlock(&pgd_lock); 975 spin_unlock_irqrestore(&pgd_lock, flags);
964 set_bit(pgd_index(address), insync); 976 set_bit(pgd_index(address), insync);
965 } 977 }
966 if (address == start) 978 if (address == start)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3a98d6f724ab..9b61c75a2355 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -591,10 +591,17 @@ void mark_rodata_ro(void)
591 if (end <= start) 591 if (end <= start)
592 return; 592 return;
593 593
594 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
595 594
596 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 595 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
597 (end - start) >> 10); 596 (end - start) >> 10);
597 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
598
599 /*
600 * The rodata section (but not the kernel text!) should also be
601 * not-executable.
602 */
603 start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
604 set_memory_nx(start, (end - start) >> PAGE_SHIFT);
598 605
599 rodata_test(); 606 rodata_test();
600 607
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 398f3a578dde..ed8201600354 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -5,6 +5,7 @@
5 * and compares page tables forwards and afterwards. 5 * and compares page tables forwards and afterwards.
6 */ 6 */
7#include <linux/bootmem.h> 7#include <linux/bootmem.h>
8#include <linux/kthread.h>
8#include <linux/random.h> 9#include <linux/random.h>
9#include <linux/kernel.h> 10#include <linux/kernel.h>
10#include <linux/init.h> 11#include <linux/init.h>
@@ -14,8 +15,13 @@
14#include <asm/pgtable.h> 15#include <asm/pgtable.h>
15#include <asm/kdebug.h> 16#include <asm/kdebug.h>
16 17
18/*
19 * Only print the results of the first pass:
20 */
21static __read_mostly int print = 1;
22
17enum { 23enum {
18 NTEST = 4000, 24 NTEST = 400,
19#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
20 LPS = (1 << PMD_SHIFT), 26 LPS = (1 << PMD_SHIFT),
21#elif defined(CONFIG_X86_PAE) 27#elif defined(CONFIG_X86_PAE)
@@ -31,7 +37,7 @@ struct split_state {
31 long min_exec, max_exec; 37 long min_exec, max_exec;
32}; 38};
33 39
34static __init int print_split(struct split_state *s) 40static int print_split(struct split_state *s)
35{ 41{
36 long i, expected, missed = 0; 42 long i, expected, missed = 0;
37 int printed = 0; 43 int printed = 0;
@@ -82,10 +88,13 @@ static __init int print_split(struct split_state *s)
82 s->max_exec = addr; 88 s->max_exec = addr;
83 } 89 }
84 } 90 }
85 printk(KERN_INFO 91 if (print) {
86 "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", 92 printk(KERN_INFO
87 s->spg, s->lpg, s->gpg, s->exec, 93 " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
88 s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed); 94 s->spg, s->lpg, s->gpg, s->exec,
95 s->min_exec != ~0UL ? s->min_exec : 0,
96 s->max_exec, missed);
97 }
89 98
90 expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed; 99 expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
91 if (expected != i) { 100 if (expected != i) {
@@ -96,11 +105,11 @@ static __init int print_split(struct split_state *s)
96 return err; 105 return err;
97} 106}
98 107
99static unsigned long __initdata addr[NTEST]; 108static unsigned long addr[NTEST];
100static unsigned int __initdata len[NTEST]; 109static unsigned int len[NTEST];
101 110
102/* Change the global bit on random pages in the direct mapping */ 111/* Change the global bit on random pages in the direct mapping */
103static __init int exercise_pageattr(void) 112static int pageattr_test(void)
104{ 113{
105 struct split_state sa, sb, sc; 114 struct split_state sa, sb, sc;
106 unsigned long *bm; 115 unsigned long *bm;
@@ -110,7 +119,8 @@ static __init int exercise_pageattr(void)
110 int i, k; 119 int i, k;
111 int err; 120 int err;
112 121
113 printk(KERN_INFO "CPA exercising pageattr\n"); 122 if (print)
123 printk(KERN_INFO "CPA self-test:\n");
114 124
115 bm = vmalloc((max_pfn_mapped + 7) / 8); 125 bm = vmalloc((max_pfn_mapped + 7) / 8);
116 if (!bm) { 126 if (!bm) {
@@ -186,7 +196,6 @@ static __init int exercise_pageattr(void)
186 196
187 failed += print_split(&sb); 197 failed += print_split(&sb);
188 198
189 printk(KERN_INFO "CPA reverting everything\n");
190 for (i = 0; i < NTEST; i++) { 199 for (i = 0; i < NTEST; i++) {
191 if (!addr[i]) 200 if (!addr[i])
192 continue; 201 continue;
@@ -214,12 +223,40 @@ static __init int exercise_pageattr(void)
214 failed += print_split(&sc); 223 failed += print_split(&sc);
215 224
216 if (failed) { 225 if (failed) {
217 printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n"); 226 printk(KERN_ERR "NOT PASSED. Please report.\n");
218 WARN_ON(1); 227 WARN_ON(1);
228 return -EINVAL;
219 } else { 229 } else {
220 printk(KERN_INFO "CPA selftests PASSED\n"); 230 if (print)
231 printk(KERN_INFO "ok.\n");
221 } 232 }
222 233
223 return 0; 234 return 0;
224} 235}
225module_init(exercise_pageattr); 236
237static int do_pageattr_test(void *__unused)
238{
239 while (!kthread_should_stop()) {
240 schedule_timeout_interruptible(HZ*30);
241 if (pageattr_test() < 0)
242 break;
243 if (print)
244 print--;
245 }
246 return 0;
247}
248
249static int start_pageattr_test(void)
250{
251 struct task_struct *p;
252
253 p = kthread_create(do_pageattr_test, NULL, "pageattr-test");
254 if (!IS_ERR(p))
255 wake_up_process(p);
256 else
257 WARN_ON(1);
258
259 return 0;
260}
261
262module_init(start_pageattr_test);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 16ce841f08d6..8493c855582b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -167,8 +167,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
167 if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext))) 167 if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
168 pgprot_val(forbidden) |= _PAGE_NX; 168 pgprot_val(forbidden) |= _PAGE_NX;
169 169
170
171#ifdef CONFIG_DEBUG_RODATA
172 /* The .rodata section needs to be read-only */ 170 /* The .rodata section needs to be read-only */
173 if (within(address, (unsigned long)__start_rodata, 171 if (within(address, (unsigned long)__start_rodata,
174 (unsigned long)__end_rodata)) 172 (unsigned long)__end_rodata))
@@ -179,7 +177,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
179 if (within(address, virt_to_highmap(__start_rodata), 177 if (within(address, virt_to_highmap(__start_rodata),
180 virt_to_highmap(__end_rodata))) 178 virt_to_highmap(__end_rodata)))
181 pgprot_val(forbidden) |= _PAGE_RW; 179 pgprot_val(forbidden) |= _PAGE_RW;
182#endif
183 180
184 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 181 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
185 182
@@ -260,17 +257,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
260 pgprot_t old_prot, new_prot; 257 pgprot_t old_prot, new_prot;
261 int level, do_split = 1; 258 int level, do_split = 1;
262 259
263 /*
264 * An Athlon 64 X2 showed hard hangs if we tried to preserve
265 * largepages and changed the PSE entry from RW to RO.
266 *
267 * As AMD CPUs have a long series of erratas in this area,
268 * (and none of the known ones seem to explain this hang),
269 * disable this code until the hang can be debugged:
270 */
271 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
272 return 1;
273
274 spin_lock_irqsave(&pgd_lock, flags); 260 spin_lock_irqsave(&pgd_lock, flags);
275 /* 261 /*
276 * Check for races, another CPU might have split this page 262 * Check for races, another CPU might have split this page
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4628c42ca892..111771d38e6e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1077,7 +1077,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
1077 current->mm->start_stack = bprm->p; 1077 current->mm->start_stack = bprm->p;
1078 1078
1079#ifdef arch_randomize_brk 1079#ifdef arch_randomize_brk
1080 if (current->flags & PF_RANDOMIZE) 1080 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
1081 current->mm->brk = current->mm->start_brk = 1081 current->mm->brk = current->mm->start_brk =
1082 arch_randomize_brk(current->mm); 1082 arch_randomize_brk(current->mm);
1083#endif 1083#endif
diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h
index 315314ce4bfb..4f6220db22b1 100644
--- a/include/asm-x86/pgalloc_64.h
+++ b/include/asm-x86/pgalloc_64.h
@@ -42,19 +42,21 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
42static inline void pgd_list_add(pgd_t *pgd) 42static inline void pgd_list_add(pgd_t *pgd)
43{ 43{
44 struct page *page = virt_to_page(pgd); 44 struct page *page = virt_to_page(pgd);
45 unsigned long flags;
45 46
46 spin_lock(&pgd_lock); 47 spin_lock_irqsave(&pgd_lock, flags);
47 list_add(&page->lru, &pgd_list); 48 list_add(&page->lru, &pgd_list);
48 spin_unlock(&pgd_lock); 49 spin_unlock_irqrestore(&pgd_lock, flags);
49} 50}
50 51
51static inline void pgd_list_del(pgd_t *pgd) 52static inline void pgd_list_del(pgd_t *pgd)
52{ 53{
53 struct page *page = virt_to_page(pgd); 54 struct page *page = virt_to_page(pgd);
55 unsigned long flags;
54 56
55 spin_lock(&pgd_lock); 57 spin_lock_irqsave(&pgd_lock, flags);
56 list_del(&page->lru); 58 list_del(&page->lru);
57 spin_unlock(&pgd_lock); 59 spin_unlock_irqrestore(&pgd_lock, flags);
58} 60}
59 61
60static inline pgd_t *pgd_alloc(struct mm_struct *mm) 62static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index c4e00161a247..b0fd85ab9efb 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -79,7 +79,7 @@ struct cpuidle_state_kobj {
79}; 79};
80 80
81struct cpuidle_device { 81struct cpuidle_device {
82 int enabled:1; 82 unsigned int enabled:1;
83 unsigned int cpu; 83 unsigned int cpu;
84 84
85 int last_residency; 85 int last_residency;
diff --git a/include/linux/init.h b/include/linux/init.h
index 90cdbbbbe077..a404a0055dd7 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -110,6 +110,7 @@
110#define __FINIT .previous 110#define __FINIT .previous
111 111
112#define __INITDATA .section ".init.data","aw" 112#define __INITDATA .section ".init.data","aw"
113#define __FINITDATA .previous
113 114
114#define __DEVINIT .section ".devinit.text", "ax" 115#define __DEVINIT .section ".devinit.text", "ax"
115#define __DEVINITDATA .section ".devinit.data", "aw" 116#define __DEVINITDATA .section ".devinit.data", "aw"
diff --git a/init/Kconfig b/init/Kconfig
index 87f50df58893..92b23e256614 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -541,6 +541,18 @@ config ELF_CORE
541 help 541 help
542 Enable support for generating core dumps. Disabling saves about 4k. 542 Enable support for generating core dumps. Disabling saves about 4k.
543 543
544config COMPAT_BRK
545 bool "Disable heap randomization"
546 default y
547 help
548 Randomizing heap placement makes heap exploits harder, but it
549 also breaks ancient binaries (including anything libc5 based).
550 This option changes the bootup default to heap randomization
551 disabled, and can be overriden runtime by setting
552 /proc/sys/kernel/randomize_va_space to 2.
553
554 On non-ancient distros (post-2000 ones) Y is usually a safe choice.
555
544config BASE_FULL 556config BASE_FULL
545 default y 557 default y
546 bool "Enable full-sized data structures for core" if EMBEDDED 558 bool "Enable full-sized data structures for core" if EMBEDDED
diff --git a/mm/memory.c b/mm/memory.c
index 7bb70728bb52..9d073fa0a2d0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -82,7 +82,18 @@ void * high_memory;
82EXPORT_SYMBOL(num_physpages); 82EXPORT_SYMBOL(num_physpages);
83EXPORT_SYMBOL(high_memory); 83EXPORT_SYMBOL(high_memory);
84 84
85int randomize_va_space __read_mostly = 1; 85/*
86 * Randomize the address space (stacks, mmaps, brk, etc.).
87 *
88 * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization,
89 * as ancient (libc5 based) binaries can segfault. )
90 */
91int randomize_va_space __read_mostly =
92#ifdef CONFIG_COMPAT_BRK
93 1;
94#else
95 2;
96#endif
86 97
87static int __init disable_randmaps(char *s) 98static int __init disable_randmaps(char *s)
88{ 99{
diff --git a/mm/mmap.c b/mm/mmap.c
index bb4c963cc534..ad6e4eaf34f8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -245,7 +245,7 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
245 245
246 down_write(&mm->mmap_sem); 246 down_write(&mm->mmap_sem);
247 247
248 if (brk < mm->end_code) 248 if (brk < mm->start_brk)
249 goto out; 249 goto out;
250 250
251 /* 251 /*