diff options
Diffstat (limited to 'arch')
35 files changed, 352 insertions, 290 deletions
diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c index 62dc379d301a..813c9b63c0e1 100644 --- a/arch/alpha/mm/extable.c +++ b/arch/alpha/mm/extable.c | |||
@@ -48,6 +48,27 @@ void sort_extable(struct exception_table_entry *start, | |||
48 | cmp_ex, swap_ex); | 48 | cmp_ex, swap_ex); |
49 | } | 49 | } |
50 | 50 | ||
51 | #ifdef CONFIG_MODULES | ||
52 | /* | ||
53 | * Any entry referring to the module init will be at the beginning or | ||
54 | * the end. | ||
55 | */ | ||
56 | void trim_init_extable(struct module *m) | ||
57 | { | ||
58 | /*trim the beginning*/ | ||
59 | while (m->num_exentries && | ||
60 | within_module_init(ex_to_addr(&m->extable[0]), m)) { | ||
61 | m->extable++; | ||
62 | m->num_exentries--; | ||
63 | } | ||
64 | /*trim the end*/ | ||
65 | while (m->num_exentries && | ||
66 | within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), | ||
67 | m)) | ||
68 | m->num_exentries--; | ||
69 | } | ||
70 | #endif /* CONFIG_MODULES */ | ||
71 | |||
51 | const struct exception_table_entry * | 72 | const struct exception_table_entry * |
52 | search_extable(const struct exception_table_entry *first, | 73 | search_extable(const struct exception_table_entry *first, |
53 | const struct exception_table_entry *last, | 74 | const struct exception_table_entry *last, |
diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 1167fe9cf6c4..98f94d041d9c 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c | |||
@@ -32,8 +32,6 @@ void module_free(struct module *mod, void *module_region) | |||
32 | mod->arch.syminfo = NULL; | 32 | mod->arch.syminfo = NULL; |
33 | 33 | ||
34 | vfree(module_region); | 34 | vfree(module_region); |
35 | /* FIXME: if module_region == mod->init_region, trim exception | ||
36 | * table entries. */ | ||
37 | } | 35 | } |
38 | 36 | ||
39 | static inline int check_rela(Elf32_Rela *rela, struct module *module, | 37 | static inline int check_rela(Elf32_Rela *rela, struct module *module, |
diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c index a187833febc8..abc13e368b90 100644 --- a/arch/cris/kernel/module.c +++ b/arch/cris/kernel/module.c | |||
@@ -48,8 +48,6 @@ void *module_alloc(unsigned long size) | |||
48 | void module_free(struct module *mod, void *module_region) | 48 | void module_free(struct module *mod, void *module_region) |
49 | { | 49 | { |
50 | FREE_MODULE(module_region); | 50 | FREE_MODULE(module_region); |
51 | /* FIXME: If module_region == mod->init_region, trim exception | ||
52 | table entries. */ | ||
53 | } | 51 | } |
54 | 52 | ||
55 | /* We don't need anything special. */ | 53 | /* We don't need anything special. */ |
diff --git a/arch/frv/kernel/module.c b/arch/frv/kernel/module.c index 850d168f69fc..711763c8a6f3 100644 --- a/arch/frv/kernel/module.c +++ b/arch/frv/kernel/module.c | |||
@@ -35,8 +35,6 @@ void *module_alloc(unsigned long size) | |||
35 | void module_free(struct module *mod, void *module_region) | 35 | void module_free(struct module *mod, void *module_region) |
36 | { | 36 | { |
37 | vfree(module_region); | 37 | vfree(module_region); |
38 | /* FIXME: If module_region == mod->init_region, trim exception | ||
39 | table entries. */ | ||
40 | } | 38 | } |
41 | 39 | ||
42 | /* We don't need anything special. */ | 40 | /* We don't need anything special. */ |
diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index cfc9127d2ced..0865e291c20d 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c | |||
@@ -23,8 +23,6 @@ void *module_alloc(unsigned long size) | |||
23 | void module_free(struct module *mod, void *module_region) | 23 | void module_free(struct module *mod, void *module_region) |
24 | { | 24 | { |
25 | vfree(module_region); | 25 | vfree(module_region); |
26 | /* FIXME: If module_region == mod->init_region, trim exception | ||
27 | table entries. */ | ||
28 | } | 26 | } |
29 | 27 | ||
30 | /* We don't need anything special. */ | 28 | /* We don't need anything special. */ |
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c index 71c50dd8f870..e95d5ad9285d 100644 --- a/arch/ia64/mm/extable.c +++ b/arch/ia64/mm/extable.c | |||
@@ -53,6 +53,32 @@ void sort_extable (struct exception_table_entry *start, | |||
53 | cmp_ex, swap_ex); | 53 | cmp_ex, swap_ex); |
54 | } | 54 | } |
55 | 55 | ||
56 | static inline unsigned long ex_to_addr(const struct exception_table_entry *x) | ||
57 | { | ||
58 | return (unsigned long)&x->insn + x->insn; | ||
59 | } | ||
60 | |||
61 | #ifdef CONFIG_MODULES | ||
62 | /* | ||
63 | * Any entry referring to the module init will be at the beginning or | ||
64 | * the end. | ||
65 | */ | ||
66 | void trim_init_extable(struct module *m) | ||
67 | { | ||
68 | /*trim the beginning*/ | ||
69 | while (m->num_exentries && | ||
70 | within_module_init(ex_to_addr(&m->extable[0]), m)) { | ||
71 | m->extable++; | ||
72 | m->num_exentries--; | ||
73 | } | ||
74 | /*trim the end*/ | ||
75 | while (m->num_exentries && | ||
76 | within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), | ||
77 | m)) | ||
78 | m->num_exentries--; | ||
79 | } | ||
80 | #endif /* CONFIG_MODULES */ | ||
81 | |||
56 | const struct exception_table_entry * | 82 | const struct exception_table_entry * |
57 | search_extable (const struct exception_table_entry *first, | 83 | search_extable (const struct exception_table_entry *first, |
58 | const struct exception_table_entry *last, | 84 | const struct exception_table_entry *last, |
diff --git a/arch/m32r/kernel/module.c b/arch/m32r/kernel/module.c index 8d4205794380..cb5f37d78d49 100644 --- a/arch/m32r/kernel/module.c +++ b/arch/m32r/kernel/module.c | |||
@@ -44,8 +44,6 @@ void *module_alloc(unsigned long size) | |||
44 | void module_free(struct module *mod, void *module_region) | 44 | void module_free(struct module *mod, void *module_region) |
45 | { | 45 | { |
46 | vfree(module_region); | 46 | vfree(module_region); |
47 | /* FIXME: If module_region == mod->init_region, trim exception | ||
48 | table entries. */ | ||
49 | } | 47 | } |
50 | 48 | ||
51 | /* We don't need anything special. */ | 49 | /* We don't need anything special. */ |
diff --git a/arch/m68k/kernel/module.c b/arch/m68k/kernel/module.c index 774862bc6977..cd6bcb1c957e 100644 --- a/arch/m68k/kernel/module.c +++ b/arch/m68k/kernel/module.c | |||
@@ -31,8 +31,6 @@ void *module_alloc(unsigned long size) | |||
31 | void module_free(struct module *mod, void *module_region) | 31 | void module_free(struct module *mod, void *module_region) |
32 | { | 32 | { |
33 | vfree(module_region); | 33 | vfree(module_region); |
34 | /* FIXME: If module_region == mod->init_region, trim exception | ||
35 | table entries. */ | ||
36 | } | 34 | } |
37 | 35 | ||
38 | /* We don't need anything special. */ | 36 | /* We don't need anything special. */ |
diff --git a/arch/m68knommu/kernel/module.c b/arch/m68knommu/kernel/module.c index 3b1a2ff61ddc..d11ffae7956a 100644 --- a/arch/m68knommu/kernel/module.c +++ b/arch/m68knommu/kernel/module.c | |||
@@ -23,8 +23,6 @@ void *module_alloc(unsigned long size) | |||
23 | void module_free(struct module *mod, void *module_region) | 23 | void module_free(struct module *mod, void *module_region) |
24 | { | 24 | { |
25 | vfree(module_region); | 25 | vfree(module_region); |
26 | /* FIXME: If module_region == mod->init_region, trim exception | ||
27 | table entries. */ | ||
28 | } | 26 | } |
29 | 27 | ||
30 | /* We don't need anything special. */ | 28 | /* We don't need anything special. */ |
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 1f60e27523d9..3e9100dcc12d 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c | |||
@@ -68,8 +68,6 @@ void *module_alloc(unsigned long size) | |||
68 | void module_free(struct module *mod, void *module_region) | 68 | void module_free(struct module *mod, void *module_region) |
69 | { | 69 | { |
70 | vfree(module_region); | 70 | vfree(module_region); |
71 | /* FIXME: If module_region == mod->init_region, trim exception | ||
72 | table entries. */ | ||
73 | } | 71 | } |
74 | 72 | ||
75 | int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, | 73 | int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, |
diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6b287f2e8e84..4fa0e3648d8e 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c | |||
@@ -48,8 +48,6 @@ void *module_alloc(unsigned long size) | |||
48 | void module_free(struct module *mod, void *module_region) | 48 | void module_free(struct module *mod, void *module_region) |
49 | { | 49 | { |
50 | vfree(module_region); | 50 | vfree(module_region); |
51 | /* FIXME: If module_region == mod->init_region, trim exception | ||
52 | * table entries. */ | ||
53 | } | 51 | } |
54 | 52 | ||
55 | /* | 53 | /* |
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index ecd1c5024447..ef5caf2e6ed0 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c | |||
@@ -267,8 +267,6 @@ void module_free(struct module *mod, void *module_region) | |||
267 | mod->arch.section = NULL; | 267 | mod->arch.section = NULL; |
268 | 268 | ||
269 | vfree(module_region); | 269 | vfree(module_region); |
270 | /* FIXME: If module_region == mod->init_region, trim exception | ||
271 | table entries. */ | ||
272 | } | 270 | } |
273 | 271 | ||
274 | /* Additional bytes needed in front of individual sections */ | 272 | /* Additional bytes needed in front of individual sections */ |
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 43e7e3a7f130..477c663e0140 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c | |||
@@ -43,8 +43,6 @@ void *module_alloc(unsigned long size) | |||
43 | void module_free(struct module *mod, void *module_region) | 43 | void module_free(struct module *mod, void *module_region) |
44 | { | 44 | { |
45 | vfree(module_region); | 45 | vfree(module_region); |
46 | /* FIXME: If module_region == mod->init_region, trim exception | ||
47 | table entries. */ | ||
48 | } | 46 | } |
49 | 47 | ||
50 | static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, | 48 | static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, |
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index eed4a00cb676..ab2e3ed28abc 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c | |||
@@ -56,8 +56,6 @@ void *module_alloc(unsigned long size) | |||
56 | void module_free(struct module *mod, void *module_region) | 56 | void module_free(struct module *mod, void *module_region) |
57 | { | 57 | { |
58 | vfree(module_region); | 58 | vfree(module_region); |
59 | /* FIXME: If module_region == mod->init_region, trim exception | ||
60 | table entries. */ | ||
61 | } | 59 | } |
62 | 60 | ||
63 | static void | 61 | static void |
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index c19b0f7d2cc1..c2efdcde266f 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c | |||
@@ -46,8 +46,6 @@ void *module_alloc(unsigned long size) | |||
46 | void module_free(struct module *mod, void *module_region) | 46 | void module_free(struct module *mod, void *module_region) |
47 | { | 47 | { |
48 | vfree(module_region); | 48 | vfree(module_region); |
49 | /* FIXME: If module_region == mod->init_region, trim exception | ||
50 | table entries. */ | ||
51 | } | 49 | } |
52 | 50 | ||
53 | /* We don't need anything special. */ | 51 | /* We don't need anything special. */ |
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 47d5619d43fa..8303ac481034 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h | |||
@@ -17,6 +17,9 @@ | |||
17 | 17 | ||
18 | #ifndef __ASSEMBLY__ | 18 | #ifndef __ASSEMBLY__ |
19 | 19 | ||
20 | #define ARCH_HAS_SORT_EXTABLE | ||
21 | #define ARCH_HAS_SEARCH_EXTABLE | ||
22 | |||
20 | /* Sparc is not segmented, however we need to be able to fool access_ok() | 23 | /* Sparc is not segmented, however we need to be able to fool access_ok() |
21 | * when doing system calls from kernel mode legitimately. | 24 | * when doing system calls from kernel mode legitimately. |
22 | * | 25 | * |
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 90273765e81f..0ee642f63234 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c | |||
@@ -75,8 +75,6 @@ void *module_alloc(unsigned long size) | |||
75 | void module_free(struct module *mod, void *module_region) | 75 | void module_free(struct module *mod, void *module_region) |
76 | { | 76 | { |
77 | vfree(module_region); | 77 | vfree(module_region); |
78 | /* FIXME: If module_region == mod->init_region, trim exception | ||
79 | table entries. */ | ||
80 | } | 78 | } |
81 | 79 | ||
82 | /* Make generic code ignore STT_REGISTER dummy undefined symbols. */ | 80 | /* Make generic code ignore STT_REGISTER dummy undefined symbols. */ |
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c index 16cc28935e39..a61c349448e1 100644 --- a/arch/sparc/mm/extable.c +++ b/arch/sparc/mm/extable.c | |||
@@ -28,6 +28,10 @@ search_extable(const struct exception_table_entry *start, | |||
28 | * word 3: last insn address + 4 bytes | 28 | * word 3: last insn address + 4 bytes |
29 | * word 4: fixup code address | 29 | * word 4: fixup code address |
30 | * | 30 | * |
31 | * Deleted entries are encoded as: | ||
32 | * word 1: unused | ||
33 | * word 2: -1 | ||
34 | * | ||
31 | * See asm/uaccess.h for more details. | 35 | * See asm/uaccess.h for more details. |
32 | */ | 36 | */ |
33 | 37 | ||
@@ -39,6 +43,10 @@ search_extable(const struct exception_table_entry *start, | |||
39 | continue; | 43 | continue; |
40 | } | 44 | } |
41 | 45 | ||
46 | /* A deleted entry; see trim_init_extable */ | ||
47 | if (walk->fixup == -1) | ||
48 | continue; | ||
49 | |||
42 | if (walk->insn == value) | 50 | if (walk->insn == value) |
43 | return walk; | 51 | return walk; |
44 | } | 52 | } |
@@ -57,6 +65,27 @@ search_extable(const struct exception_table_entry *start, | |||
57 | return NULL; | 65 | return NULL; |
58 | } | 66 | } |
59 | 67 | ||
68 | #ifdef CONFIG_MODULES | ||
69 | /* We could memmove them around; easier to mark the trimmed ones. */ | ||
70 | void trim_init_extable(struct module *m) | ||
71 | { | ||
72 | unsigned int i; | ||
73 | bool range; | ||
74 | |||
75 | for (i = 0; i < m->num_exentries; i += range ? 2 : 1) { | ||
76 | range = m->extable[i].fixup == 0; | ||
77 | |||
78 | if (within_module_init(m->extable[i].insn, m)) { | ||
79 | m->extable[i].fixup = -1; | ||
80 | if (range) | ||
81 | m->extable[i+1].fixup = -1; | ||
82 | } | ||
83 | if (range) | ||
84 | i++; | ||
85 | } | ||
86 | } | ||
87 | #endif /* CONFIG_MODULES */ | ||
88 | |||
60 | /* Special extable search, which handles ranges. Returns fixup */ | 89 | /* Special extable search, which handles ranges. Returns fixup */ |
61 | unsigned long search_extables_range(unsigned long addr, unsigned long *g2) | 90 | unsigned long search_extables_range(unsigned long addr, unsigned long *g2) |
62 | { | 91 | { |
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 58da2480a7f4..9ce3f165111a 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h | |||
@@ -53,16 +53,21 @@ extern unsigned long end_iomem; | |||
53 | #else | 53 | #else |
54 | # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) | 54 | # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) |
55 | #endif | 55 | #endif |
56 | #define MODULES_VADDR VMALLOC_START | ||
57 | #define MODULES_END VMALLOC_END | ||
58 | #define MODULES_LEN (MODULES_VADDR - MODULES_END) | ||
56 | 59 | ||
57 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) | 60 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) |
58 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 61 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
59 | #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) | 62 | #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) |
60 | 63 | #define __PAGE_KERNEL_EXEC \ | |
64 | (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) | ||
61 | #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) | 65 | #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) |
62 | #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) | 66 | #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) |
63 | #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | 67 | #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) |
64 | #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) | 68 | #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) |
65 | #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) | 69 | #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) |
70 | #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) | ||
66 | 71 | ||
67 | /* | 72 | /* |
68 | * The i386 can't do page protection for execute, and considers that the same | 73 | * The i386 can't do page protection for execute, and considers that the same |
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 598b5c1903af..1b549bca4645 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile | |||
@@ -8,7 +8,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ | |||
8 | 8 | ||
9 | subarch-obj-y = lib/semaphore_32.o lib/string_32.o | 9 | subarch-obj-y = lib/semaphore_32.o lib/string_32.o |
10 | subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o | 10 | subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o |
11 | subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o | 11 | subarch-obj-$(CONFIG_MODULES) += kernel/module.o |
12 | 12 | ||
13 | USER_OBJS := bugs.o ptrace_user.o fault.o | 13 | USER_OBJS := bugs.o ptrace_user.o fault.o |
14 | 14 | ||
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index c8b4cce9cfe1..2201e9c20e4a 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile | |||
@@ -8,10 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ | |||
8 | setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ | 8 | setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ |
9 | sysrq.o ksyms.o tls.o | 9 | sysrq.o ksyms.o tls.o |
10 | 10 | ||
11 | obj-$(CONFIG_MODULES) += um_module.o | ||
12 | |||
13 | subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o | 11 | subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o |
14 | subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o | 12 | subarch-obj-$(CONFIG_MODULES) += kernel/module.o |
15 | 13 | ||
16 | ldt-y = ../sys-i386/ldt.o | 14 | ldt-y = ../sys-i386/ldt.o |
17 | 15 | ||
diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c deleted file mode 100644 index 3dead392a415..000000000000 --- a/arch/um/sys-x86_64/um_module.c +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | #include <linux/vmalloc.h> | ||
2 | #include <linux/moduleloader.h> | ||
3 | |||
4 | /* Copied from i386 arch/i386/kernel/module.c */ | ||
5 | void *module_alloc(unsigned long size) | ||
6 | { | ||
7 | if (size == 0) | ||
8 | return NULL; | ||
9 | return vmalloc_exec(size); | ||
10 | } | ||
11 | |||
12 | /* Free memory returned from module_alloc */ | ||
13 | void module_free(struct module *mod, void *module_region) | ||
14 | { | ||
15 | vfree(module_region); | ||
16 | /* | ||
17 | * FIXME: If module_region == mod->init_region, trim exception | ||
18 | * table entries. | ||
19 | */ | ||
20 | } | ||
21 | |||
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 1caf57628b9c..313389cd50d2 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h | |||
@@ -17,8 +17,13 @@ | |||
17 | /* Pages for switcher itself, then two pages per cpu */ | 17 | /* Pages for switcher itself, then two pages per cpu */ |
18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) | 18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) |
19 | 19 | ||
20 | /* We map at -4M for ease of mapping into the guest (one PTE page). */ | 20 | /* We map at -4M (-2M when PAE is activated) for ease of mapping |
21 | * into the guest (one PTE page). */ | ||
22 | #ifdef CONFIG_X86_PAE | ||
23 | #define SWITCHER_ADDR 0xFFE00000 | ||
24 | #else | ||
21 | #define SWITCHER_ADDR 0xFFC00000 | 25 | #define SWITCHER_ADDR 0xFFC00000 |
26 | #endif | ||
22 | 27 | ||
23 | /* Found in switcher.S */ | 28 | /* Found in switcher.S */ |
24 | extern unsigned long default_idt_entries[]; | 29 | extern unsigned long default_idt_entries[]; |
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index faae1996487b..d31c4a684078 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
@@ -12,11 +12,13 @@ | |||
12 | #define LHCALL_TS 8 | 12 | #define LHCALL_TS 8 |
13 | #define LHCALL_SET_CLOCKEVENT 9 | 13 | #define LHCALL_SET_CLOCKEVENT 9 |
14 | #define LHCALL_HALT 10 | 14 | #define LHCALL_HALT 10 |
15 | #define LHCALL_SET_PMD 13 | ||
15 | #define LHCALL_SET_PTE 14 | 16 | #define LHCALL_SET_PTE 14 |
16 | #define LHCALL_SET_PMD 15 | 17 | #define LHCALL_SET_PGD 15 |
17 | #define LHCALL_LOAD_TLS 16 | 18 | #define LHCALL_LOAD_TLS 16 |
18 | #define LHCALL_NOTIFY 17 | 19 | #define LHCALL_NOTIFY 17 |
19 | #define LHCALL_LOAD_GDT_ENTRY 18 | 20 | #define LHCALL_LOAD_GDT_ENTRY 18 |
21 | #define LHCALL_SEND_INTERRUPTS 19 | ||
20 | 22 | ||
21 | #define LGUEST_TRAP_ENTRY 0x1F | 23 | #define LGUEST_TRAP_ENTRY 0x1F |
22 | 24 | ||
@@ -32,10 +34,10 @@ | |||
32 | * operations? There are two ways: the direct way is to make a "hypercall", | 34 | * operations? There are two ways: the direct way is to make a "hypercall", |
33 | * to make requests of the Host Itself. | 35 | * to make requests of the Host Itself. |
34 | * | 36 | * |
35 | * We use the KVM hypercall mechanism. Eighteen hypercalls are | 37 | * We use the KVM hypercall mechanism. Seventeen hypercalls are |
36 | * available: the hypercall number is put in the %eax register, and the | 38 | * available: the hypercall number is put in the %eax register, and the |
37 | * arguments (when required) are placed in %ebx, %ecx and %edx. If a return | 39 | * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. |
38 | * value makes sense, it's returned in %eax. | 40 | * If a return value makes sense, it's returned in %eax. |
39 | * | 41 | * |
40 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful | 42 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful |
41 | * Host, rather than returning failure. This reflects Winston Churchill's | 43 | * Host, rather than returning failure. This reflects Winston Churchill's |
@@ -47,8 +49,9 @@ | |||
47 | 49 | ||
48 | #define LHCALL_RING_SIZE 64 | 50 | #define LHCALL_RING_SIZE 64 |
49 | struct hcall_args { | 51 | struct hcall_args { |
50 | /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ | 52 | /* These map directly onto eax, ebx, ecx, edx and esi |
51 | unsigned long arg0, arg1, arg2, arg3; | 53 | * in struct lguest_regs */ |
54 | unsigned long arg0, arg1, arg2, arg3, arg4; | ||
52 | }; | 55 | }; |
53 | 56 | ||
54 | #endif /* !__ASSEMBLY__ */ | 57 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 2733fad45f98..5e67c1532314 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h | |||
@@ -46,6 +46,10 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ | |||
46 | # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) | 46 | # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | #define MODULES_VADDR VMALLOC_START | ||
50 | #define MODULES_END VMALLOC_END | ||
51 | #define MODULES_LEN (MODULES_VADDR - MODULES_END) | ||
52 | |||
49 | #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) | 53 | #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) |
50 | 54 | ||
51 | #endif /* _ASM_X86_PGTABLE_32_DEFS_H */ | 55 | #endif /* _ASM_X86_PGTABLE_32_DEFS_H */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4f78bd682125..f3477bb84566 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | |||
73 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 73 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
74 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 74 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
75 | obj-$(CONFIG_KPROBES) += kprobes.o | 75 | obj-$(CONFIG_KPROBES) += kprobes.o |
76 | obj-$(CONFIG_MODULES) += module_$(BITS).o | 76 | obj-$(CONFIG_MODULES) += module.o |
77 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o | 77 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o |
78 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | 78 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o |
79 | obj-$(CONFIG_KGDB) += kgdb.o | 79 | obj-$(CONFIG_KGDB) += kgdb.o |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a830cbd7015..dfdbf6403895 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -126,6 +126,7 @@ void foo(void) | |||
126 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) | 126 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
127 | BLANK(); | 127 | BLANK(); |
128 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 128 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
129 | OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); | ||
129 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); | 130 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); |
130 | 131 | ||
131 | BLANK(); | 132 | BLANK(); |
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module.c index c23880b90b5c..89f386f044e4 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module.c | |||
@@ -1,6 +1,5 @@ | |||
1 | /* Kernel module help for x86-64 | 1 | /* Kernel module help for x86. |
2 | Copyright (C) 2001 Rusty Russell. | 2 | Copyright (C) 2001 Rusty Russell. |
3 | Copyright (C) 2002,2003 Andi Kleen, SuSE Labs. | ||
4 | 3 | ||
5 | This program is free software; you can redistribute it and/or modify | 4 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by | 5 | it under the terms of the GNU General Public License as published by |
@@ -22,23 +21,18 @@ | |||
22 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
23 | #include <linux/string.h> | 22 | #include <linux/string.h> |
24 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
25 | #include <linux/mm.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/bug.h> | 24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> | ||
28 | 26 | ||
29 | #include <asm/system.h> | 27 | #include <asm/system.h> |
30 | #include <asm/page.h> | 28 | #include <asm/page.h> |
31 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
32 | 30 | ||
31 | #if 0 | ||
32 | #define DEBUGP printk | ||
33 | #else | ||
33 | #define DEBUGP(fmt...) | 34 | #define DEBUGP(fmt...) |
34 | 35 | #endif | |
35 | #ifndef CONFIG_UML | ||
36 | void module_free(struct module *mod, void *module_region) | ||
37 | { | ||
38 | vfree(module_region); | ||
39 | /* FIXME: If module_region == mod->init_region, trim exception | ||
40 | table entries. */ | ||
41 | } | ||
42 | 36 | ||
43 | void *module_alloc(unsigned long size) | 37 | void *module_alloc(unsigned long size) |
44 | { | 38 | { |
@@ -54,9 +48,15 @@ void *module_alloc(unsigned long size) | |||
54 | if (!area) | 48 | if (!area) |
55 | return NULL; | 49 | return NULL; |
56 | 50 | ||
57 | return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); | 51 | return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, |
52 | PAGE_KERNEL_EXEC); | ||
53 | } | ||
54 | |||
55 | /* Free memory returned from module_alloc */ | ||
56 | void module_free(struct module *mod, void *module_region) | ||
57 | { | ||
58 | vfree(module_region); | ||
58 | } | 59 | } |
59 | #endif | ||
60 | 60 | ||
61 | /* We don't need anything special. */ | 61 | /* We don't need anything special. */ |
62 | int module_frob_arch_sections(Elf_Ehdr *hdr, | 62 | int module_frob_arch_sections(Elf_Ehdr *hdr, |
@@ -67,6 +67,58 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, | |||
67 | return 0; | 67 | return 0; |
68 | } | 68 | } |
69 | 69 | ||
70 | #ifdef CONFIG_X86_32 | ||
71 | int apply_relocate(Elf32_Shdr *sechdrs, | ||
72 | const char *strtab, | ||
73 | unsigned int symindex, | ||
74 | unsigned int relsec, | ||
75 | struct module *me) | ||
76 | { | ||
77 | unsigned int i; | ||
78 | Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; | ||
79 | Elf32_Sym *sym; | ||
80 | uint32_t *location; | ||
81 | |||
82 | DEBUGP("Applying relocate section %u to %u\n", relsec, | ||
83 | sechdrs[relsec].sh_info); | ||
84 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | ||
85 | /* This is where to make the change */ | ||
86 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | ||
87 | + rel[i].r_offset; | ||
88 | /* This is the symbol it is referring to. Note that all | ||
89 | undefined symbols have been resolved. */ | ||
90 | sym = (Elf32_Sym *)sechdrs[symindex].sh_addr | ||
91 | + ELF32_R_SYM(rel[i].r_info); | ||
92 | |||
93 | switch (ELF32_R_TYPE(rel[i].r_info)) { | ||
94 | case R_386_32: | ||
95 | /* We add the value into the location given */ | ||
96 | *location += sym->st_value; | ||
97 | break; | ||
98 | case R_386_PC32: | ||
99 | /* Add the value, subtract its postition */ | ||
100 | *location += sym->st_value - (uint32_t)location; | ||
101 | break; | ||
102 | default: | ||
103 | printk(KERN_ERR "module %s: Unknown relocation: %u\n", | ||
104 | me->name, ELF32_R_TYPE(rel[i].r_info)); | ||
105 | return -ENOEXEC; | ||
106 | } | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | int apply_relocate_add(Elf32_Shdr *sechdrs, | ||
112 | const char *strtab, | ||
113 | unsigned int symindex, | ||
114 | unsigned int relsec, | ||
115 | struct module *me) | ||
116 | { | ||
117 | printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", | ||
118 | me->name); | ||
119 | return -ENOEXEC; | ||
120 | } | ||
121 | #else /*X86_64*/ | ||
70 | int apply_relocate_add(Elf64_Shdr *sechdrs, | 122 | int apply_relocate_add(Elf64_Shdr *sechdrs, |
71 | const char *strtab, | 123 | const char *strtab, |
72 | unsigned int symindex, | 124 | unsigned int symindex, |
@@ -147,6 +199,8 @@ int apply_relocate(Elf_Shdr *sechdrs, | |||
147 | return -ENOSYS; | 199 | return -ENOSYS; |
148 | } | 200 | } |
149 | 201 | ||
202 | #endif | ||
203 | |||
150 | int module_finalize(const Elf_Ehdr *hdr, | 204 | int module_finalize(const Elf_Ehdr *hdr, |
151 | const Elf_Shdr *sechdrs, | 205 | const Elf_Shdr *sechdrs, |
152 | struct module *me) | 206 | struct module *me) |
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c deleted file mode 100644 index 0edd819050e7..000000000000 --- a/arch/x86/kernel/module_32.c +++ /dev/null | |||
@@ -1,152 +0,0 @@ | |||
1 | /* Kernel module help for i386. | ||
2 | Copyright (C) 2001 Rusty Russell. | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 2 of the License, or | ||
7 | (at your option) any later version. | ||
8 | |||
9 | This program is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with this program; if not, write to the Free Software | ||
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | #include <linux/moduleloader.h> | ||
19 | #include <linux/elf.h> | ||
20 | #include <linux/vmalloc.h> | ||
21 | #include <linux/fs.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/bug.h> | ||
25 | |||
26 | #if 0 | ||
27 | #define DEBUGP printk | ||
28 | #else | ||
29 | #define DEBUGP(fmt...) | ||
30 | #endif | ||
31 | |||
32 | void *module_alloc(unsigned long size) | ||
33 | { | ||
34 | if (size == 0) | ||
35 | return NULL; | ||
36 | return vmalloc_exec(size); | ||
37 | } | ||
38 | |||
39 | |||
40 | /* Free memory returned from module_alloc */ | ||
41 | void module_free(struct module *mod, void *module_region) | ||
42 | { | ||
43 | vfree(module_region); | ||
44 | /* FIXME: If module_region == mod->init_region, trim exception | ||
45 | table entries. */ | ||
46 | } | ||
47 | |||
48 | /* We don't need anything special. */ | ||
49 | int module_frob_arch_sections(Elf_Ehdr *hdr, | ||
50 | Elf_Shdr *sechdrs, | ||
51 | char *secstrings, | ||
52 | struct module *mod) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | int apply_relocate(Elf32_Shdr *sechdrs, | ||
58 | const char *strtab, | ||
59 | unsigned int symindex, | ||
60 | unsigned int relsec, | ||
61 | struct module *me) | ||
62 | { | ||
63 | unsigned int i; | ||
64 | Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; | ||
65 | Elf32_Sym *sym; | ||
66 | uint32_t *location; | ||
67 | |||
68 | DEBUGP("Applying relocate section %u to %u\n", relsec, | ||
69 | sechdrs[relsec].sh_info); | ||
70 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | ||
71 | /* This is where to make the change */ | ||
72 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | ||
73 | + rel[i].r_offset; | ||
74 | /* This is the symbol it is referring to. Note that all | ||
75 | undefined symbols have been resolved. */ | ||
76 | sym = (Elf32_Sym *)sechdrs[symindex].sh_addr | ||
77 | + ELF32_R_SYM(rel[i].r_info); | ||
78 | |||
79 | switch (ELF32_R_TYPE(rel[i].r_info)) { | ||
80 | case R_386_32: | ||
81 | /* We add the value into the location given */ | ||
82 | *location += sym->st_value; | ||
83 | break; | ||
84 | case R_386_PC32: | ||
85 | /* Add the value, subtract its postition */ | ||
86 | *location += sym->st_value - (uint32_t)location; | ||
87 | break; | ||
88 | default: | ||
89 | printk(KERN_ERR "module %s: Unknown relocation: %u\n", | ||
90 | me->name, ELF32_R_TYPE(rel[i].r_info)); | ||
91 | return -ENOEXEC; | ||
92 | } | ||
93 | } | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | int apply_relocate_add(Elf32_Shdr *sechdrs, | ||
98 | const char *strtab, | ||
99 | unsigned int symindex, | ||
100 | unsigned int relsec, | ||
101 | struct module *me) | ||
102 | { | ||
103 | printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", | ||
104 | me->name); | ||
105 | return -ENOEXEC; | ||
106 | } | ||
107 | |||
108 | int module_finalize(const Elf_Ehdr *hdr, | ||
109 | const Elf_Shdr *sechdrs, | ||
110 | struct module *me) | ||
111 | { | ||
112 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, | ||
113 | *para = NULL; | ||
114 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | ||
115 | |||
116 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | ||
117 | if (!strcmp(".text", secstrings + s->sh_name)) | ||
118 | text = s; | ||
119 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) | ||
120 | alt = s; | ||
121 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | ||
122 | locks = s; | ||
123 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | ||
124 | para = s; | ||
125 | } | ||
126 | |||
127 | if (alt) { | ||
128 | /* patch .altinstructions */ | ||
129 | void *aseg = (void *)alt->sh_addr; | ||
130 | apply_alternatives(aseg, aseg + alt->sh_size); | ||
131 | } | ||
132 | if (locks && text) { | ||
133 | void *lseg = (void *)locks->sh_addr; | ||
134 | void *tseg = (void *)text->sh_addr; | ||
135 | alternatives_smp_module_add(me, me->name, | ||
136 | lseg, lseg + locks->sh_size, | ||
137 | tseg, tseg + text->sh_size); | ||
138 | } | ||
139 | |||
140 | if (para) { | ||
141 | void *pseg = (void *)para->sh_addr; | ||
142 | apply_paravirt(pseg, pseg + para->sh_size); | ||
143 | } | ||
144 | |||
145 | return module_bug_finalize(hdr, sechdrs, me); | ||
146 | } | ||
147 | |||
148 | void module_arch_cleanup(struct module *mod) | ||
149 | { | ||
150 | alternatives_smp_module_del(mod); | ||
151 | module_bug_cleanup(mod); | ||
152 | } | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d1c636bf31a7..be5ae80f897f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -301,15 +301,13 @@ static void __init reserve_brk(void) | |||
301 | 301 | ||
302 | #ifdef CONFIG_BLK_DEV_INITRD | 302 | #ifdef CONFIG_BLK_DEV_INITRD |
303 | 303 | ||
304 | #ifdef CONFIG_X86_32 | ||
305 | |||
306 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | 304 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) |
307 | static void __init relocate_initrd(void) | 305 | static void __init relocate_initrd(void) |
308 | { | 306 | { |
309 | 307 | ||
310 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 308 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
311 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 309 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
312 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 310 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
313 | u64 ramdisk_here; | 311 | u64 ramdisk_here; |
314 | unsigned long slop, clen, mapaddr; | 312 | unsigned long slop, clen, mapaddr; |
315 | char *p, *q; | 313 | char *p, *q; |
@@ -365,14 +363,13 @@ static void __init relocate_initrd(void) | |||
365 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 363 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
366 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 364 | ramdisk_here, ramdisk_here + ramdisk_size - 1); |
367 | } | 365 | } |
368 | #endif | ||
369 | 366 | ||
370 | static void __init reserve_initrd(void) | 367 | static void __init reserve_initrd(void) |
371 | { | 368 | { |
372 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 369 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
373 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 370 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
374 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | 371 | u64 ramdisk_end = ramdisk_image + ramdisk_size; |
375 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 372 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
376 | 373 | ||
377 | if (!boot_params.hdr.type_of_loader || | 374 | if (!boot_params.hdr.type_of_loader || |
378 | !ramdisk_image || !ramdisk_size) | 375 | !ramdisk_image || !ramdisk_size) |
@@ -402,14 +399,8 @@ static void __init reserve_initrd(void) | |||
402 | return; | 399 | return; |
403 | } | 400 | } |
404 | 401 | ||
405 | #ifdef CONFIG_X86_32 | ||
406 | relocate_initrd(); | 402 | relocate_initrd(); |
407 | #else | 403 | |
408 | printk(KERN_ERR "initrd extends beyond end of memory " | ||
409 | "(0x%08llx > 0x%08llx)\ndisabling initrd\n", | ||
410 | ramdisk_end, end_of_lowmem); | ||
411 | initrd_start = 0; | ||
412 | #endif | ||
413 | free_early(ramdisk_image, ramdisk_end); | 404 | free_early(ramdisk_image, ramdisk_end); |
414 | } | 405 | } |
415 | #else | 406 | #else |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4c85b2e2bb65..367e87882041 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -108,6 +108,8 @@ SECTIONS | |||
108 | /* Data */ | 108 | /* Data */ |
109 | . = ALIGN(PAGE_SIZE); | 109 | . = ALIGN(PAGE_SIZE); |
110 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 110 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
111 | /* Start of data section */ | ||
112 | _sdata = .; | ||
111 | DATA_DATA | 113 | DATA_DATA |
112 | CONSTRUCTORS | 114 | CONSTRUCTORS |
113 | 115 | ||
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 8dab8f7844d3..38718041efc3 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig | |||
@@ -2,7 +2,6 @@ config LGUEST_GUEST | |||
2 | bool "Lguest guest support" | 2 | bool "Lguest guest support" |
3 | select PARAVIRT | 3 | select PARAVIRT |
4 | depends on X86_32 | 4 | depends on X86_32 |
5 | depends on !X86_PAE | ||
6 | select VIRTIO | 5 | select VIRTIO |
7 | select VIRTIO_RING | 6 | select VIRTIO_RING |
8 | select VIRTIO_CONSOLE | 7 | select VIRTIO_CONSOLE |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4e0c26559395..7bc65f0f62c4 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -87,7 +87,7 @@ struct lguest_data lguest_data = { | |||
87 | 87 | ||
88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a | 88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a |
89 | * ring buffer of stored hypercalls which the Host will run though next time we | 89 | * ring buffer of stored hypercalls which the Host will run though next time we |
90 | * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall | 90 | * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall |
91 | * arguments, and a "hcall_status" word which is 0 if the call is ready to go, | 91 | * arguments, and a "hcall_status" word which is 0 if the call is ready to go, |
92 | * and 255 once the Host has finished with it. | 92 | * and 255 once the Host has finished with it. |
93 | * | 93 | * |
@@ -96,7 +96,8 @@ struct lguest_data lguest_data = { | |||
96 | * effect of causing the Host to run all the stored calls in the ring buffer | 96 | * effect of causing the Host to run all the stored calls in the ring buffer |
97 | * which empties it for next time! */ | 97 | * which empties it for next time! */ |
98 | static void async_hcall(unsigned long call, unsigned long arg1, | 98 | static void async_hcall(unsigned long call, unsigned long arg1, |
99 | unsigned long arg2, unsigned long arg3) | 99 | unsigned long arg2, unsigned long arg3, |
100 | unsigned long arg4) | ||
100 | { | 101 | { |
101 | /* Note: This code assumes we're uniprocessor. */ | 102 | /* Note: This code assumes we're uniprocessor. */ |
102 | static unsigned int next_call; | 103 | static unsigned int next_call; |
@@ -108,12 +109,13 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
108 | local_irq_save(flags); | 109 | local_irq_save(flags); |
109 | if (lguest_data.hcall_status[next_call] != 0xFF) { | 110 | if (lguest_data.hcall_status[next_call] != 0xFF) { |
110 | /* Table full, so do normal hcall which will flush table. */ | 111 | /* Table full, so do normal hcall which will flush table. */ |
111 | kvm_hypercall3(call, arg1, arg2, arg3); | 112 | kvm_hypercall4(call, arg1, arg2, arg3, arg4); |
112 | } else { | 113 | } else { |
113 | lguest_data.hcalls[next_call].arg0 = call; | 114 | lguest_data.hcalls[next_call].arg0 = call; |
114 | lguest_data.hcalls[next_call].arg1 = arg1; | 115 | lguest_data.hcalls[next_call].arg1 = arg1; |
115 | lguest_data.hcalls[next_call].arg2 = arg2; | 116 | lguest_data.hcalls[next_call].arg2 = arg2; |
116 | lguest_data.hcalls[next_call].arg3 = arg3; | 117 | lguest_data.hcalls[next_call].arg3 = arg3; |
118 | lguest_data.hcalls[next_call].arg4 = arg4; | ||
117 | /* Arguments must all be written before we mark it to go */ | 119 | /* Arguments must all be written before we mark it to go */ |
118 | wmb(); | 120 | wmb(); |
119 | lguest_data.hcall_status[next_call] = 0; | 121 | lguest_data.hcall_status[next_call] = 0; |
@@ -141,7 +143,7 @@ static void lazy_hcall1(unsigned long call, | |||
141 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 143 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
142 | kvm_hypercall1(call, arg1); | 144 | kvm_hypercall1(call, arg1); |
143 | else | 145 | else |
144 | async_hcall(call, arg1, 0, 0); | 146 | async_hcall(call, arg1, 0, 0, 0); |
145 | } | 147 | } |
146 | 148 | ||
147 | static void lazy_hcall2(unsigned long call, | 149 | static void lazy_hcall2(unsigned long call, |
@@ -151,7 +153,7 @@ static void lazy_hcall2(unsigned long call, | |||
151 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 153 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
152 | kvm_hypercall2(call, arg1, arg2); | 154 | kvm_hypercall2(call, arg1, arg2); |
153 | else | 155 | else |
154 | async_hcall(call, arg1, arg2, 0); | 156 | async_hcall(call, arg1, arg2, 0, 0); |
155 | } | 157 | } |
156 | 158 | ||
157 | static void lazy_hcall3(unsigned long call, | 159 | static void lazy_hcall3(unsigned long call, |
@@ -162,9 +164,23 @@ static void lazy_hcall3(unsigned long call, | |||
162 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 164 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
163 | kvm_hypercall3(call, arg1, arg2, arg3); | 165 | kvm_hypercall3(call, arg1, arg2, arg3); |
164 | else | 166 | else |
165 | async_hcall(call, arg1, arg2, arg3); | 167 | async_hcall(call, arg1, arg2, arg3, 0); |
166 | } | 168 | } |
167 | 169 | ||
170 | #ifdef CONFIG_X86_PAE | ||
171 | static void lazy_hcall4(unsigned long call, | ||
172 | unsigned long arg1, | ||
173 | unsigned long arg2, | ||
174 | unsigned long arg3, | ||
175 | unsigned long arg4) | ||
176 | { | ||
177 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | ||
178 | kvm_hypercall4(call, arg1, arg2, arg3, arg4); | ||
179 | else | ||
180 | async_hcall(call, arg1, arg2, arg3, arg4); | ||
181 | } | ||
182 | #endif | ||
183 | |||
168 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then | 184 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then |
169 | * issue the do-nothing hypercall to flush any stored calls. */ | 185 | * issue the do-nothing hypercall to flush any stored calls. */ |
170 | static void lguest_leave_lazy_mmu_mode(void) | 186 | static void lguest_leave_lazy_mmu_mode(void) |
@@ -179,7 +195,7 @@ static void lguest_end_context_switch(struct task_struct *next) | |||
179 | paravirt_end_context_switch(next); | 195 | paravirt_end_context_switch(next); |
180 | } | 196 | } |
181 | 197 | ||
182 | /*G:033 | 198 | /*G:032 |
183 | * After that diversion we return to our first native-instruction | 199 | * After that diversion we return to our first native-instruction |
184 | * replacements: four functions for interrupt control. | 200 | * replacements: four functions for interrupt control. |
185 | * | 201 | * |
@@ -199,30 +215,28 @@ static unsigned long save_fl(void) | |||
199 | { | 215 | { |
200 | return lguest_data.irq_enabled; | 216 | return lguest_data.irq_enabled; |
201 | } | 217 | } |
202 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); | ||
203 | |||
204 | /* restore_flags() just sets the flags back to the value given. */ | ||
205 | static void restore_fl(unsigned long flags) | ||
206 | { | ||
207 | lguest_data.irq_enabled = flags; | ||
208 | } | ||
209 | PV_CALLEE_SAVE_REGS_THUNK(restore_fl); | ||
210 | 218 | ||
211 | /* Interrupts go off... */ | 219 | /* Interrupts go off... */ |
212 | static void irq_disable(void) | 220 | static void irq_disable(void) |
213 | { | 221 | { |
214 | lguest_data.irq_enabled = 0; | 222 | lguest_data.irq_enabled = 0; |
215 | } | 223 | } |
224 | |||
225 | /* Let's pause a moment. Remember how I said these are called so often? | ||
226 | * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to | ||
227 | * break some rules. In particular, these functions are assumed to save their | ||
228 | * own registers if they need to: normal C functions assume they can trash the | ||
229 | * eax register. To use normal C functions, we use | ||
230 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the | ||
231 | * C function, then restores it. */ | ||
232 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); | ||
216 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); | 233 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); |
234 | /*:*/ | ||
217 | 235 | ||
218 | /* Interrupts go on... */ | 236 | /* These are in i386_head.S */ |
219 | static void irq_enable(void) | 237 | extern void lg_irq_enable(void); |
220 | { | 238 | extern void lg_restore_fl(unsigned long flags); |
221 | lguest_data.irq_enabled = X86_EFLAGS_IF; | ||
222 | } | ||
223 | PV_CALLEE_SAVE_REGS_THUNK(irq_enable); | ||
224 | 239 | ||
225 | /*:*/ | ||
226 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable | 240 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable |
227 | * them (or when we unmask an interrupt). This seems to work for the moment, | 241 | * them (or when we unmask an interrupt). This seems to work for the moment, |
228 | * since interrupts are rare and we'll just get the interrupt on the next timer | 242 | * since interrupts are rare and we'll just get the interrupt on the next timer |
@@ -368,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
368 | case 1: /* Basic feature request. */ | 382 | case 1: /* Basic feature request. */ |
369 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ | 383 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ |
370 | *cx &= 0x00002201; | 384 | *cx &= 0x00002201; |
371 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ | 385 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */ |
372 | *dx &= 0x07808111; | 386 | *dx &= 0x07808151; |
373 | /* The Host can do a nice optimization if it knows that the | 387 | /* The Host can do a nice optimization if it knows that the |
374 | * kernel mappings (addresses above 0xC0000000 or whatever | 388 | * kernel mappings (addresses above 0xC0000000 or whatever |
375 | * PAGE_OFFSET is set to) haven't changed. But Linux calls | 389 | * PAGE_OFFSET is set to) haven't changed. But Linux calls |
@@ -388,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
388 | if (*ax > 0x80000008) | 402 | if (*ax > 0x80000008) |
389 | *ax = 0x80000008; | 403 | *ax = 0x80000008; |
390 | break; | 404 | break; |
405 | case 0x80000001: | ||
406 | /* Here we should fix nx cap depending on host. */ | ||
407 | /* For this version of PAE, we just clear NX bit. */ | ||
408 | *dx &= ~(1 << 20); | ||
409 | break; | ||
391 | } | 410 | } |
392 | } | 411 | } |
393 | 412 | ||
@@ -521,25 +540,52 @@ static void lguest_write_cr4(unsigned long val) | |||
521 | static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, | 540 | static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, |
522 | pte_t *ptep) | 541 | pte_t *ptep) |
523 | { | 542 | { |
543 | #ifdef CONFIG_X86_PAE | ||
544 | lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr, | ||
545 | ptep->pte_low, ptep->pte_high); | ||
546 | #else | ||
524 | lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); | 547 | lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); |
548 | #endif | ||
525 | } | 549 | } |
526 | 550 | ||
527 | static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, | 551 | static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, |
528 | pte_t *ptep, pte_t pteval) | 552 | pte_t *ptep, pte_t pteval) |
529 | { | 553 | { |
530 | *ptep = pteval; | 554 | native_set_pte(ptep, pteval); |
531 | lguest_pte_update(mm, addr, ptep); | 555 | lguest_pte_update(mm, addr, ptep); |
532 | } | 556 | } |
533 | 557 | ||
534 | /* The Guest calls this to set a top-level entry. Again, we set the entry then | 558 | /* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd |
535 | * tell the Host which top-level page we changed, and the index of the entry we | 559 | * to set a middle-level entry when PAE is activated. |
536 | * changed. */ | 560 | * Again, we set the entry then tell the Host which page we changed, |
561 | * and the index of the entry we changed. */ | ||
562 | #ifdef CONFIG_X86_PAE | ||
563 | static void lguest_set_pud(pud_t *pudp, pud_t pudval) | ||
564 | { | ||
565 | native_set_pud(pudp, pudval); | ||
566 | |||
567 | /* 32 bytes aligned pdpt address and the index. */ | ||
568 | lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0, | ||
569 | (__pa(pudp) & 0x1F) / sizeof(pud_t)); | ||
570 | } | ||
571 | |||
537 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 572 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
538 | { | 573 | { |
539 | *pmdp = pmdval; | 574 | native_set_pmd(pmdp, pmdval); |
540 | lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK, | 575 | lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK, |
541 | (__pa(pmdp) & (PAGE_SIZE - 1)) / 4); | 576 | (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); |
542 | } | 577 | } |
578 | #else | ||
579 | |||
580 | /* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not | ||
581 | * activated. */ | ||
582 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
583 | { | ||
584 | native_set_pmd(pmdp, pmdval); | ||
585 | lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK, | ||
586 | (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); | ||
587 | } | ||
588 | #endif | ||
543 | 589 | ||
544 | /* There are a couple of legacy places where the kernel sets a PTE, but we | 590 | /* There are a couple of legacy places where the kernel sets a PTE, but we |
545 | * don't know the top level any more. This is useless for us, since we don't | 591 | * don't know the top level any more. This is useless for us, since we don't |
@@ -552,11 +598,31 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
552 | * which brings boot back to 0.25 seconds. */ | 598 | * which brings boot back to 0.25 seconds. */ |
553 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) | 599 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) |
554 | { | 600 | { |
555 | *ptep = pteval; | 601 | native_set_pte(ptep, pteval); |
602 | if (cr3_changed) | ||
603 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); | ||
604 | } | ||
605 | |||
606 | #ifdef CONFIG_X86_PAE | ||
607 | static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
608 | { | ||
609 | native_set_pte_atomic(ptep, pte); | ||
556 | if (cr3_changed) | 610 | if (cr3_changed) |
557 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); | 611 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); |
558 | } | 612 | } |
559 | 613 | ||
614 | void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
615 | { | ||
616 | native_pte_clear(mm, addr, ptep); | ||
617 | lguest_pte_update(mm, addr, ptep); | ||
618 | } | ||
619 | |||
620 | void lguest_pmd_clear(pmd_t *pmdp) | ||
621 | { | ||
622 | lguest_set_pmd(pmdp, __pmd(0)); | ||
623 | } | ||
624 | #endif | ||
625 | |||
560 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on | 626 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on |
561 | * native page table operations. On native hardware you can set a new page | 627 | * native page table operations. On native hardware you can set a new page |
562 | * table entry whenever you want, but if you want to remove one you have to do | 628 | * table entry whenever you want, but if you want to remove one you have to do |
@@ -628,13 +694,12 @@ static void __init lguest_init_IRQ(void) | |||
628 | { | 694 | { |
629 | unsigned int i; | 695 | unsigned int i; |
630 | 696 | ||
631 | for (i = 0; i < LGUEST_IRQS; i++) { | 697 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
632 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
633 | /* Some systems map "vectors" to interrupts weirdly. Lguest has | 698 | /* Some systems map "vectors" to interrupts weirdly. Lguest has |
634 | * a straightforward 1 to 1 mapping, so force that here. */ | 699 | * a straightforward 1 to 1 mapping, so force that here. */ |
635 | __get_cpu_var(vector_irq)[vector] = i; | 700 | __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; |
636 | if (vector != SYSCALL_VECTOR) | 701 | if (i != SYSCALL_VECTOR) |
637 | set_intr_gate(vector, interrupt[i]); | 702 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); |
638 | } | 703 | } |
639 | /* This call is required to set up for 4k stacks, where we have | 704 | /* This call is required to set up for 4k stacks, where we have |
640 | * separate stacks for hard and soft interrupts. */ | 705 | * separate stacks for hard and soft interrupts. */ |
@@ -973,10 +1038,10 @@ static void lguest_restart(char *reason) | |||
973 | * | 1038 | * |
974 | * Our current solution is to allow the paravirt back end to optionally patch | 1039 | * Our current solution is to allow the paravirt back end to optionally patch |
975 | * over the indirect calls to replace them with something more efficient. We | 1040 | * over the indirect calls to replace them with something more efficient. We |
976 | * patch the four most commonly called functions: disable interrupts, enable | 1041 | * patch two of the simplest of the most commonly called functions: disable |
977 | * interrupts, restore interrupts and save interrupts. We usually have 6 or 10 | 1042 | * interrupts and save interrupts. We usually have 6 or 10 bytes to patch |
978 | * bytes to patch into: the Guest versions of these operations are small enough | 1043 | * into: the Guest versions of these operations are small enough that we can |
979 | * that we can fit comfortably. | 1044 | * fit comfortably. |
980 | * | 1045 | * |
981 | * First we need assembly templates of each of the patchable Guest operations, | 1046 | * First we need assembly templates of each of the patchable Guest operations, |
982 | * and these are in i386_head.S. */ | 1047 | * and these are in i386_head.S. */ |
@@ -987,8 +1052,6 @@ static const struct lguest_insns | |||
987 | const char *start, *end; | 1052 | const char *start, *end; |
988 | } lguest_insns[] = { | 1053 | } lguest_insns[] = { |
989 | [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, | 1054 | [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, |
990 | [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, | ||
991 | [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, | ||
992 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, | 1055 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, |
993 | }; | 1056 | }; |
994 | 1057 | ||
@@ -1026,6 +1089,7 @@ __init void lguest_init(void) | |||
1026 | pv_info.name = "lguest"; | 1089 | pv_info.name = "lguest"; |
1027 | pv_info.paravirt_enabled = 1; | 1090 | pv_info.paravirt_enabled = 1; |
1028 | pv_info.kernel_rpl = 1; | 1091 | pv_info.kernel_rpl = 1; |
1092 | pv_info.shared_kernel_pmd = 1; | ||
1029 | 1093 | ||
1030 | /* We set up all the lguest overrides for sensitive operations. These | 1094 | /* We set up all the lguest overrides for sensitive operations. These |
1031 | * are detailed with the operations themselves. */ | 1095 | * are detailed with the operations themselves. */ |
@@ -1033,9 +1097,9 @@ __init void lguest_init(void) | |||
1033 | /* interrupt-related operations */ | 1097 | /* interrupt-related operations */ |
1034 | pv_irq_ops.init_IRQ = lguest_init_IRQ; | 1098 | pv_irq_ops.init_IRQ = lguest_init_IRQ; |
1035 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); | 1099 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); |
1036 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); | 1100 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); |
1037 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); | 1101 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); |
1038 | pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); | 1102 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); |
1039 | pv_irq_ops.safe_halt = lguest_safe_halt; | 1103 | pv_irq_ops.safe_halt = lguest_safe_halt; |
1040 | 1104 | ||
1041 | /* init-time operations */ | 1105 | /* init-time operations */ |
@@ -1071,6 +1135,12 @@ __init void lguest_init(void) | |||
1071 | pv_mmu_ops.set_pte = lguest_set_pte; | 1135 | pv_mmu_ops.set_pte = lguest_set_pte; |
1072 | pv_mmu_ops.set_pte_at = lguest_set_pte_at; | 1136 | pv_mmu_ops.set_pte_at = lguest_set_pte_at; |
1073 | pv_mmu_ops.set_pmd = lguest_set_pmd; | 1137 | pv_mmu_ops.set_pmd = lguest_set_pmd; |
1138 | #ifdef CONFIG_X86_PAE | ||
1139 | pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic; | ||
1140 | pv_mmu_ops.pte_clear = lguest_pte_clear; | ||
1141 | pv_mmu_ops.pmd_clear = lguest_pmd_clear; | ||
1142 | pv_mmu_ops.set_pud = lguest_set_pud; | ||
1143 | #endif | ||
1074 | pv_mmu_ops.read_cr2 = lguest_read_cr2; | 1144 | pv_mmu_ops.read_cr2 = lguest_read_cr2; |
1075 | pv_mmu_ops.read_cr3 = lguest_read_cr3; | 1145 | pv_mmu_ops.read_cr3 = lguest_read_cr3; |
1076 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; | 1146 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index f79541989471..a9c8cfe61cd4 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -46,10 +46,64 @@ ENTRY(lguest_entry) | |||
46 | .globl lgstart_##name; .globl lgend_##name | 46 | .globl lgstart_##name; .globl lgend_##name |
47 | 47 | ||
48 | LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) | 48 | LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) |
49 | LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) | ||
50 | LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) | ||
51 | LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) | 49 | LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) |
52 | /*:*/ | 50 | |
51 | /*G:033 But using those wrappers is inefficient (we'll see why that doesn't | ||
52 | * matter for save_fl and irq_disable later). If we write our routines | ||
53 | * carefully in assembler, we can avoid clobbering any registers and avoid | ||
54 | * jumping through the wrapper functions. | ||
55 | * | ||
56 | * I skipped over our first piece of assembler, but this one is worth studying | ||
57 | * in a bit more detail so I'll describe in easy stages. First, the routine | ||
58 | * to enable interrupts: */ | ||
59 | ENTRY(lg_irq_enable) | ||
60 | /* The reverse of irq_disable, this sets lguest_data.irq_enabled to | ||
61 | * X86_EFLAGS_IF (ie. "Interrupts enabled"). */ | ||
62 | movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled | ||
63 | /* But now we need to check if the Host wants to know: there might have | ||
64 | * been interrupts waiting to be delivered, in which case it will have | ||
65 | * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we | ||
66 | * jump to send_interrupts, otherwise we're done. */ | ||
67 | testl $0, lguest_data+LGUEST_DATA_irq_pending | ||
68 | jnz send_interrupts | ||
69 | /* One cool thing about x86 is that you can do many things without using | ||
70 | * a register. In this case, the normal path hasn't needed to save or | ||
71 | * restore any registers at all! */ | ||
72 | ret | ||
73 | send_interrupts: | ||
74 | /* OK, now we need a register: eax is used for the hypercall number, | ||
75 | * which is LHCALL_SEND_INTERRUPTS. | ||
76 | * | ||
77 | * We used not to bother with this pending detection at all, which was | ||
78 | * much simpler. Sooner or later the Host would realize it had to | ||
79 | * send us an interrupt. But that turns out to make performance 7 | ||
80 | * times worse on a simple tcp benchmark. So now we do this the hard | ||
81 | * way. */ | ||
82 | pushl %eax | ||
83 | movl $LHCALL_SEND_INTERRUPTS, %eax | ||
84 | /* This is a vmcall instruction (same thing that KVM uses). Older | ||
85 | * assembler versions might not know the "vmcall" instruction, so we | ||
86 | * create one manually here. */ | ||
87 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ | ||
88 | popl %eax | ||
89 | ret | ||
90 | |||
91 | /* Finally, the "popf" or "restore flags" routine. The %eax register holds the | ||
92 | * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're | ||
93 | * enabling interrupts again, if it's 0 we're leaving them off. */ | ||
94 | ENTRY(lg_restore_fl) | ||
95 | /* This is just "lguest_data.irq_enabled = flags;" */ | ||
96 | movl %eax, lguest_data+LGUEST_DATA_irq_enabled | ||
97 | /* Now, if the %eax value has enabled interrupts and | ||
98 | * lguest_data.irq_pending is set, we want to tell the Host so it can | ||
99 | * deliver any outstanding interrupts. Fortunately, both values will | ||
100 | * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" | ||
101 | * instruction will AND them together for us. If both are set, we | ||
102 | * jump to send_interrupts. */ | ||
103 | testl lguest_data+LGUEST_DATA_irq_pending, %eax | ||
104 | jnz send_interrupts | ||
105 | /* Again, the normal path has used no extra registers. Clever, huh? */ | ||
106 | ret | ||
53 | 107 | ||
54 | /* These demark the EIP range where host should never deliver interrupts. */ | 108 | /* These demark the EIP range where host should never deliver interrupts. */ |
55 | .global lguest_noirq_start | 109 | .global lguest_noirq_start |
diff --git a/arch/xtensa/kernel/module.c b/arch/xtensa/kernel/module.c index 3981a466c779..c1accea8cb56 100644 --- a/arch/xtensa/kernel/module.c +++ b/arch/xtensa/kernel/module.c | |||
@@ -34,8 +34,6 @@ void *module_alloc(unsigned long size) | |||
34 | void module_free(struct module *mod, void *module_region) | 34 | void module_free(struct module *mod, void *module_region) |
35 | { | 35 | { |
36 | vfree(module_region); | 36 | vfree(module_region); |
37 | /* FIXME: If module_region == mod->init_region, trim exception | ||
38 | table entries. */ | ||
39 | } | 37 | } |
40 | 38 | ||
41 | int module_frob_arch_sections(Elf32_Ehdr *hdr, | 39 | int module_frob_arch_sections(Elf32_Ehdr *hdr, |