diff options
author | Matthieu Castet <castet.matthieu@free.fr> | 2010-11-16 16:31:26 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-11-18 06:52:04 -0500 |
commit | 5bd5a452662bc37c54fb6828db1a3faf87e6511c (patch) | |
tree | bf9aa794fc6d69315b01c17f00ba619cf20191c3 | |
parent | 64edc8ed5ffae999d8d413ba006850e9e34166cb (diff) |
x86: Add NX protection for kernel data
This patch expands functionality of CONFIG_DEBUG_RODATA to set main
(static) kernel data area as NX.
The following steps are taken to achieve this:
1. Linker script is adjusted so .text always starts and ends on a page bound
2. Linker script is adjusted so .rodata always start and end on a page boundary
3. NX is set for all pages from _etext through _end in mark_rodata_ro.
4. free_init_pages() sets released memory NX in arch/x86/mm/init.c
5. bios rom is set to x when pcibios is used.
The results of patch application may be observed in the diff of kernel page
table dumps:
pcibios:
-- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400
++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400
0x00000000-0xc0000000 3G pmd
---[ Kernel Mapping ]---
-0xc0000000-0xc0100000 1M RW GLB x pte
+0xc0000000-0xc00a0000 640K RW GLB NX pte
+0xc00a0000-0xc0100000 384K RW GLB x pte
-0xc0100000-0xc03d7000 2908K ro GLB x pte
+0xc0100000-0xc0318000 2144K ro GLB x pte
+0xc0318000-0xc03d7000 764K ro GLB NX pte
-0xc03d7000-0xc0600000 2212K RW GLB x pte
+0xc03d7000-0xc0600000 2212K RW GLB NX pte
0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd
0xf7a00000-0xf7bfe000 2040K RW GLB NX pte
0xf7bfe000-0xf7c00000 8K pte
No pcibios:
-- data_nx_pt_before.txt 2009-10-13 07:48:59.000000000 -0400
++ data_nx_pt_after.txt 2009-10-13 07:26:46.000000000 -0400
0x00000000-0xc0000000 3G pmd
---[ Kernel Mapping ]---
-0xc0000000-0xc0100000 1M RW GLB x pte
+0xc0000000-0xc0100000 1M RW GLB NX pte
-0xc0100000-0xc03d7000 2908K ro GLB x pte
+0xc0100000-0xc0318000 2144K ro GLB x pte
+0xc0318000-0xc03d7000 764K ro GLB NX pte
-0xc03d7000-0xc0600000 2212K RW GLB x pte
+0xc03d7000-0xc0600000 2212K RW GLB NX pte
0xc0600000-0xf7a00000 884M RW PSE GLB NX pmd
0xf7a00000-0xf7bfe000 2040K RW GLB NX pte
0xf7bfe000-0xf7c00000 8K pte
The patch has been originally developed for Linux 2.6.34-rc2 x86 by
Siarhei Liakh <sliakh.lkml@gmail.com> and Xuxian Jiang <jiang@cs.ncsu.edu>.
-v1: initial patch for 2.6.30
-v2: patch for 2.6.31-rc7
-v3: moved all code into arch/x86, adjusted credits
-v4: fixed ifdef, removed credits from CREDITS
-v5: fixed an address calculation bug in mark_nxdata_nx()
-v6: added acked-by and PT dump diff to commit log
-v7: minor adjustments for -tip
-v8: rework with the merge of "Set first MB as RW+NX"
Signed-off-by: Siarhei Liakh <sliakh.lkml@gmail.com>
Signed-off-by: Xuxian Jiang <jiang@cs.ncsu.edu>
Signed-off-by: Matthieu CASTET <castet.matthieu@free.fr>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: James Morris <jmorris@namei.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Dave Jones <davej@redhat.com>
Cc: Kees Cook <kees.cook@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <4CE2F82E.60601@free.fr>
[ minor cleanliness edits ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/pci.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 8 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 20 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 5 | ||||
-rw-r--r-- | arch/x86/pci/pcbios.c | 23 |
7 files changed, 57 insertions, 6 deletions
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ca0437c714b2..676129229630 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start; | |||
65 | 65 | ||
66 | #define PCIBIOS_MIN_CARDBUS_IO 0x4000 | 66 | #define PCIBIOS_MIN_CARDBUS_IO 0x4000 |
67 | 67 | ||
68 | extern int pcibios_enabled; | ||
68 | void pcibios_config_init(void); | 69 | void pcibios_config_init(void); |
69 | struct pci_bus *pcibios_scan_root(int bus); | 70 | struct pci_bus *pcibios_scan_root(int bus); |
70 | 71 | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e03530aebfd0..bf4700755184 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -69,7 +69,7 @@ jiffies_64 = jiffies; | |||
69 | 69 | ||
70 | PHDRS { | 70 | PHDRS { |
71 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
72 | data PT_LOAD FLAGS(7); /* RWE */ | 72 | data PT_LOAD FLAGS(6); /* RW_ */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | user PT_LOAD FLAGS(5); /* R_E */ | 74 | user PT_LOAD FLAGS(5); /* R_E */ |
75 | #ifdef CONFIG_SMP | 75 | #ifdef CONFIG_SMP |
@@ -116,6 +116,10 @@ SECTIONS | |||
116 | 116 | ||
117 | EXCEPTION_TABLE(16) :text = 0x9090 | 117 | EXCEPTION_TABLE(16) :text = 0x9090 |
118 | 118 | ||
119 | #if defined(CONFIG_DEBUG_RODATA) | ||
120 | /* .text should occupy whole number of pages */ | ||
121 | . = ALIGN(PAGE_SIZE); | ||
122 | #endif | ||
119 | X64_ALIGN_DEBUG_RODATA_BEGIN | 123 | X64_ALIGN_DEBUG_RODATA_BEGIN |
120 | RO_DATA(PAGE_SIZE) | 124 | RO_DATA(PAGE_SIZE) |
121 | X64_ALIGN_DEBUG_RODATA_END | 125 | X64_ALIGN_DEBUG_RODATA_END |
@@ -335,7 +339,7 @@ SECTIONS | |||
335 | __bss_start = .; | 339 | __bss_start = .; |
336 | *(.bss..page_aligned) | 340 | *(.bss..page_aligned) |
337 | *(.bss) | 341 | *(.bss) |
338 | . = ALIGN(4); | 342 | . = ALIGN(PAGE_SIZE); |
339 | __bss_stop = .; | 343 | __bss_stop = .; |
340 | } | 344 | } |
341 | 345 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c0e28a13de7d..947f42abe820 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
364 | /* | 364 | /* |
365 | * We just marked the kernel text read only above, now that | 365 | * We just marked the kernel text read only above, now that |
366 | * we are going to free part of that, we need to make that | 366 | * we are going to free part of that, we need to make that |
367 | * writeable first. | 367 | * writeable and non-executable first. |
368 | */ | 368 | */ |
369 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); | ||
369 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 370 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
370 | 371 | ||
371 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | 372 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0e969f9f401b..f89b5bb4e93f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
226 | 226 | ||
227 | static inline int is_kernel_text(unsigned long addr) | 227 | static inline int is_kernel_text(unsigned long addr) |
228 | { | 228 | { |
229 | if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) | 229 | if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) |
230 | return 1; | 230 | return 1; |
231 | return 0; | 231 | return 0; |
232 | } | 232 | } |
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void) | |||
912 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 912 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
913 | } | 913 | } |
914 | 914 | ||
915 | static void mark_nxdata_nx(void) | ||
916 | { | ||
917 | /* | ||
918 | * When this called, init has already been executed and released, | ||
919 | * so everything past _etext sould be NX. | ||
920 | */ | ||
921 | unsigned long start = PFN_ALIGN(_etext); | ||
922 | /* | ||
923 | * This comes from is_kernel_text upper limit. Also HPAGE where used: | ||
924 | */ | ||
925 | unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; | ||
926 | |||
927 | if (__supported_pte_mask & _PAGE_NX) | ||
928 | printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); | ||
929 | set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); | ||
930 | } | ||
931 | |||
915 | void mark_rodata_ro(void) | 932 | void mark_rodata_ro(void) |
916 | { | 933 | { |
917 | unsigned long start = PFN_ALIGN(_text); | 934 | unsigned long start = PFN_ALIGN(_text); |
@@ -946,6 +963,7 @@ void mark_rodata_ro(void) | |||
946 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 963 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
947 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 964 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
948 | #endif | 965 | #endif |
966 | mark_nxdata_nx(); | ||
949 | } | 967 | } |
950 | #endif | 968 | #endif |
951 | 969 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 71a59296af80..ce59c05cae12 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -788,6 +788,7 @@ void mark_rodata_ro(void) | |||
788 | unsigned long rodata_start = | 788 | unsigned long rodata_start = |
789 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 789 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; |
790 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | 790 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; |
791 | unsigned long kernel_end = (((unsigned long)&__init_end + HPAGE_SIZE) & HPAGE_MASK); | ||
791 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | 792 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); |
792 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | 793 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); |
793 | unsigned long data_start = (unsigned long) &_sdata; | 794 | unsigned long data_start = (unsigned long) &_sdata; |
@@ -802,7 +803,7 @@ void mark_rodata_ro(void) | |||
802 | * The rodata section (but not the kernel text!) should also be | 803 | * The rodata section (but not the kernel text!) should also be |
803 | * not-executable. | 804 | * not-executable. |
804 | */ | 805 | */ |
805 | set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); | 806 | set_memory_nx(rodata_start, (kernel_end - rodata_start) >> PAGE_SHIFT); |
806 | 807 | ||
807 | rodata_test(); | 808 | rodata_test(); |
808 | 809 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6f2a6b6deb6b..8b830ca14ac4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/pfn.h> | 13 | #include <linux/pfn.h> |
14 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/pci.h> | ||
16 | 17 | ||
17 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -261,8 +262,10 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
261 | * The BIOS area between 640k and 1Mb needs to be executable for | 262 | * The BIOS area between 640k and 1Mb needs to be executable for |
262 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. | 263 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. |
263 | */ | 264 | */ |
264 | if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | 265 | #ifdef CONFIG_PCI_BIOS |
266 | if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | ||
265 | pgprot_val(forbidden) |= _PAGE_NX; | 267 | pgprot_val(forbidden) |= _PAGE_NX; |
268 | #endif | ||
266 | 269 | ||
267 | /* | 270 | /* |
268 | * The kernel text needs to be executable for obvious reasons | 271 | * The kernel text needs to be executable for obvious reasons |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 2492d165096a..a5f7d0d63de0 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
10 | #include <asm/pci_x86.h> | 10 | #include <asm/pci_x86.h> |
11 | #include <asm/pci-functions.h> | 11 | #include <asm/pci-functions.h> |
12 | #include <asm/cacheflush.h> | ||
12 | 13 | ||
13 | /* BIOS32 signature: "_32_" */ | 14 | /* BIOS32 signature: "_32_" */ |
14 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) | 15 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) |
@@ -25,6 +26,27 @@ | |||
25 | #define PCIBIOS_HW_TYPE1_SPEC 0x10 | 26 | #define PCIBIOS_HW_TYPE1_SPEC 0x10 |
26 | #define PCIBIOS_HW_TYPE2_SPEC 0x20 | 27 | #define PCIBIOS_HW_TYPE2_SPEC 0x20 |
27 | 28 | ||
29 | int pcibios_enabled; | ||
30 | |||
31 | /* According to the BIOS specification at: | ||
32 | * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could | ||
33 | * restrict the x zone to some pages and make it ro. But this may be | ||
34 | * broken on some bios, complex to handle with static_protections. | ||
35 | * We could make the 0xe0000-0x100000 range rox, but this can break | ||
36 | * some ISA mapping. | ||
37 | * | ||
38 | * So we let's an rw and x hole when pcibios is used. This shouldn't | ||
39 | * happen for modern system with mmconfig, and if you don't want it | ||
40 | * you could disable pcibios... | ||
41 | */ | ||
42 | static inline void set_bios_x(void) | ||
43 | { | ||
44 | pcibios_enabled = 1; | ||
45 | set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); | ||
46 | if (__supported_pte_mask & _PAGE_NX) | ||
47 | printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); | ||
48 | } | ||
49 | |||
28 | /* | 50 | /* |
29 | * This is the standard structure used to identify the entry point | 51 | * This is the standard structure used to identify the entry point |
30 | * to the BIOS32 Service Directory, as documented in | 52 | * to the BIOS32 Service Directory, as documented in |
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void) | |||
332 | DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", | 354 | DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", |
333 | bios32_entry); | 355 | bios32_entry); |
334 | bios32_indirect.address = bios32_entry + PAGE_OFFSET; | 356 | bios32_indirect.address = bios32_entry + PAGE_OFFSET; |
357 | set_bios_x(); | ||
335 | if (check_pcibios()) | 358 | if (check_pcibios()) |
336 | return &pci_bios_access; | 359 | return &pci_bios_access; |
337 | } | 360 | } |