diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-05-02 13:27:12 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-05-02 13:27:12 -0400 |
commit | d4f7a2c18e59e0304a1c733589ce14fc02fec1bd (patch) | |
tree | 99c64275f7eb50925aad71a74ae083ececdbb795 /arch/i386 | |
parent | a6c4e076ee4c1ea670e4faa55814e63dd08e3f29 (diff) |
[PATCH] i386: Relocate VDSO ELF headers to match mapped location with COMPAT_VDSO
Some versions of libc can't deal with a VDSO which doesn't have its
ELF headers matching its mapped address. COMPAT_VDSO maps the VDSO at
a specific system-wide fixed address. Previously this was all done at
build time, on the grounds that the fixed VDSO address is always at
the top of the address space. However, a hypervisor may reserve some
of that address space, pushing the fixmap address down.
This patch does the adjustment dynamically at runtime, depending on
the runtime location of the VDSO fixmap.
[ Patch has been through several hands: Jan Beulich wrote the orignal
version; Zach reworked it, and Jeremy converted it to relocate phdrs
as well as sections. ]
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Zachary Amsden <zach@vmware.com>
Cc: "Jan Beulich" <JBeulich@novell.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/kernel/entry.S | 4 | ||||
-rw-r--r-- | arch/i386/kernel/sysenter.c | 158 | ||||
-rw-r--r-- | arch/i386/mm/pgtable.c | 6 |
3 files changed, 149 insertions, 19 deletions
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index c61c6b67e856..e901952dff37 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -305,16 +305,12 @@ sysenter_past_esp: | |||
305 | pushl $(__USER_CS) | 305 | pushl $(__USER_CS) |
306 | CFI_ADJUST_CFA_OFFSET 4 | 306 | CFI_ADJUST_CFA_OFFSET 4 |
307 | /*CFI_REL_OFFSET cs, 0*/ | 307 | /*CFI_REL_OFFSET cs, 0*/ |
308 | #ifndef CONFIG_COMPAT_VDSO | ||
309 | /* | 308 | /* |
310 | * Push current_thread_info()->sysenter_return to the stack. | 309 | * Push current_thread_info()->sysenter_return to the stack. |
311 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 310 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
312 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 311 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
313 | */ | 312 | */ |
314 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | 313 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) |
315 | #else | ||
316 | pushl $SYSENTER_RETURN | ||
317 | #endif | ||
318 | CFI_ADJUST_CFA_OFFSET 4 | 314 | CFI_ADJUST_CFA_OFFSET 4 |
319 | CFI_REL_OFFSET eip, 0 | 315 | CFI_REL_OFFSET eip, 0 |
320 | 316 | ||
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 13ca54a85a1c..e5a958379ac9 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
25 | #include <asm/elf.h> | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * Should the kernel map a VDSO page into processes and pass its | 28 | * Should the kernel map a VDSO page into processes and pass its |
@@ -46,6 +47,129 @@ __setup("vdso=", vdso_setup); | |||
46 | 47 | ||
47 | extern asmlinkage void sysenter_entry(void); | 48 | extern asmlinkage void sysenter_entry(void); |
48 | 49 | ||
50 | #ifdef CONFIG_COMPAT_VDSO | ||
51 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | ||
52 | unsigned offset, unsigned size) | ||
53 | { | ||
54 | Elf32_Sym *sym = (void *)ehdr + offset; | ||
55 | unsigned nsym = size / sizeof(*sym); | ||
56 | unsigned i; | ||
57 | |||
58 | for(i = 0; i < nsym; i++, sym++) { | ||
59 | if (sym->st_shndx == SHN_UNDEF || | ||
60 | sym->st_shndx == SHN_ABS) | ||
61 | continue; /* skip */ | ||
62 | |||
63 | if (sym->st_shndx > SHN_LORESERVE) { | ||
64 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | ||
65 | sym->st_shndx); | ||
66 | continue; | ||
67 | } | ||
68 | |||
69 | switch(ELF_ST_TYPE(sym->st_info)) { | ||
70 | case STT_OBJECT: | ||
71 | case STT_FUNC: | ||
72 | case STT_SECTION: | ||
73 | case STT_FILE: | ||
74 | sym->st_value += VDSO_HIGH_BASE; | ||
75 | } | ||
76 | } | ||
77 | } | ||
78 | |||
79 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | ||
80 | { | ||
81 | Elf32_Dyn *dyn = (void *)ehdr + offset; | ||
82 | |||
83 | for(; dyn->d_tag != DT_NULL; dyn++) | ||
84 | switch(dyn->d_tag) { | ||
85 | case DT_PLTGOT: | ||
86 | case DT_HASH: | ||
87 | case DT_STRTAB: | ||
88 | case DT_SYMTAB: | ||
89 | case DT_RELA: | ||
90 | case DT_INIT: | ||
91 | case DT_FINI: | ||
92 | case DT_REL: | ||
93 | case DT_DEBUG: | ||
94 | case DT_JMPREL: | ||
95 | case DT_VERSYM: | ||
96 | case DT_VERDEF: | ||
97 | case DT_VERNEED: | ||
98 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: | ||
99 | /* definitely pointers needing relocation */ | ||
100 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
101 | break; | ||
102 | |||
103 | case DT_ENCODING ... OLD_DT_LOOS-1: | ||
104 | case DT_LOOS ... DT_HIOS-1: | ||
105 | /* Tags above DT_ENCODING are pointers if | ||
106 | they're even */ | ||
107 | if (dyn->d_tag >= DT_ENCODING && | ||
108 | (dyn->d_tag & 1) == 0) | ||
109 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
110 | break; | ||
111 | |||
112 | case DT_VERDEFNUM: | ||
113 | case DT_VERNEEDNUM: | ||
114 | case DT_FLAGS_1: | ||
115 | case DT_RELACOUNT: | ||
116 | case DT_RELCOUNT: | ||
117 | case DT_VALRNGLO ... DT_VALRNGHI: | ||
118 | /* definitely not pointers */ | ||
119 | break; | ||
120 | |||
121 | case OLD_DT_LOOS ... DT_LOOS-1: | ||
122 | case DT_HIOS ... DT_VALRNGLO-1: | ||
123 | default: | ||
124 | if (dyn->d_tag > DT_ENCODING) | ||
125 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | ||
126 | dyn->d_tag); | ||
127 | break; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) | ||
132 | { | ||
133 | Elf32_Phdr *phdr; | ||
134 | Elf32_Shdr *shdr; | ||
135 | int i; | ||
136 | |||
137 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | ||
138 | !elf_check_arch(ehdr) || | ||
139 | ehdr->e_type != ET_DYN); | ||
140 | |||
141 | ehdr->e_entry += VDSO_HIGH_BASE; | ||
142 | |||
143 | /* rebase phdrs */ | ||
144 | phdr = (void *)ehdr + ehdr->e_phoff; | ||
145 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
146 | phdr[i].p_vaddr += VDSO_HIGH_BASE; | ||
147 | |||
148 | /* relocate dynamic stuff */ | ||
149 | if (phdr[i].p_type == PT_DYNAMIC) | ||
150 | reloc_dyn(ehdr, phdr[i].p_offset); | ||
151 | } | ||
152 | |||
153 | /* rebase sections */ | ||
154 | shdr = (void *)ehdr + ehdr->e_shoff; | ||
155 | for(i = 0; i < ehdr->e_shnum; i++) { | ||
156 | if (!(shdr[i].sh_flags & SHF_ALLOC)) | ||
157 | continue; | ||
158 | |||
159 | shdr[i].sh_addr += VDSO_HIGH_BASE; | ||
160 | |||
161 | if (shdr[i].sh_type == SHT_SYMTAB || | ||
162 | shdr[i].sh_type == SHT_DYNSYM) | ||
163 | reloc_symtab(ehdr, shdr[i].sh_offset, | ||
164 | shdr[i].sh_size); | ||
165 | } | ||
166 | } | ||
167 | #else | ||
168 | static inline void relocate_vdso(Elf32_Ehdr *ehdr) | ||
169 | { | ||
170 | } | ||
171 | #endif /* COMPAT_VDSO */ | ||
172 | |||
49 | void enable_sep_cpu(void) | 173 | void enable_sep_cpu(void) |
50 | { | 174 | { |
51 | int cpu = get_cpu(); | 175 | int cpu = get_cpu(); |
@@ -75,6 +199,9 @@ static struct page *syscall_pages[1]; | |||
75 | int __init sysenter_setup(void) | 199 | int __init sysenter_setup(void) |
76 | { | 200 | { |
77 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | 201 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
202 | const void *vsyscall; | ||
203 | size_t vsyscall_len; | ||
204 | |||
78 | syscall_pages[0] = virt_to_page(syscall_page); | 205 | syscall_pages[0] = virt_to_page(syscall_page); |
79 | 206 | ||
80 | #ifdef CONFIG_COMPAT_VDSO | 207 | #ifdef CONFIG_COMPAT_VDSO |
@@ -83,23 +210,23 @@ int __init sysenter_setup(void) | |||
83 | #endif | 210 | #endif |
84 | 211 | ||
85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 212 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
86 | memcpy(syscall_page, | 213 | vsyscall = &vsyscall_int80_start; |
87 | &vsyscall_int80_start, | 214 | vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; |
88 | &vsyscall_int80_end - &vsyscall_int80_start); | 215 | } else { |
89 | return 0; | 216 | vsyscall = &vsyscall_sysenter_start; |
217 | vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; | ||
90 | } | 218 | } |
91 | 219 | ||
92 | memcpy(syscall_page, | 220 | memcpy(syscall_page, vsyscall, vsyscall_len); |
93 | &vsyscall_sysenter_start, | 221 | relocate_vdso(syscall_page); |
94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | ||
95 | 222 | ||
96 | return 0; | 223 | return 0; |
97 | } | 224 | } |
98 | 225 | ||
99 | #ifndef CONFIG_COMPAT_VDSO | ||
100 | /* Defined in vsyscall-sysenter.S */ | 226 | /* Defined in vsyscall-sysenter.S */ |
101 | extern void SYSENTER_RETURN; | 227 | extern void SYSENTER_RETURN; |
102 | 228 | ||
229 | #ifdef __HAVE_ARCH_GATE_AREA | ||
103 | /* Setup a VMA at program startup for the vsyscall page */ | 230 | /* Setup a VMA at program startup for the vsyscall page */ |
104 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | 231 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) |
105 | { | 232 | { |
@@ -159,4 +286,17 @@ int in_gate_area_no_task(unsigned long addr) | |||
159 | { | 286 | { |
160 | return 0; | 287 | return 0; |
161 | } | 288 | } |
162 | #endif | 289 | #else /* !__HAVE_ARCH_GATE_AREA */ |
290 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | ||
291 | { | ||
292 | /* | ||
293 | * If not creating userspace VMA, simply set vdso to point to | ||
294 | * fixmap page. | ||
295 | */ | ||
296 | current->mm->context.vdso = (void *)VDSO_HIGH_BASE; | ||
297 | current_thread_info()->sysenter_return = | ||
298 | (void *)VDSO_SYM(&SYSENTER_RETURN); | ||
299 | |||
300 | return 0; | ||
301 | } | ||
302 | #endif /* __HAVE_ARCH_GATE_AREA */ | ||
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index fa0cfbd551e1..99c09edc3dbb 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | |||
144 | } | 144 | } |
145 | 145 | ||
146 | static int fixmaps; | 146 | static int fixmaps; |
147 | #ifndef CONFIG_COMPAT_VDSO | ||
148 | unsigned long __FIXADDR_TOP = 0xfffff000; | 147 | unsigned long __FIXADDR_TOP = 0xfffff000; |
149 | EXPORT_SYMBOL(__FIXADDR_TOP); | 148 | EXPORT_SYMBOL(__FIXADDR_TOP); |
150 | #endif | ||
151 | 149 | ||
152 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) | 150 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) |
153 | { | 151 | { |
@@ -173,12 +171,8 @@ void reserve_top_address(unsigned long reserve) | |||
173 | BUG_ON(fixmaps > 0); | 171 | BUG_ON(fixmaps > 0); |
174 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", | 172 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", |
175 | (int)-reserve); | 173 | (int)-reserve); |
176 | #ifdef CONFIG_COMPAT_VDSO | ||
177 | BUG_ON(reserve != 0); | ||
178 | #else | ||
179 | __FIXADDR_TOP = -reserve - PAGE_SIZE; | 174 | __FIXADDR_TOP = -reserve - PAGE_SIZE; |
180 | __VMALLOC_RESERVE += reserve; | 175 | __VMALLOC_RESERVE += reserve; |
181 | #endif | ||
182 | } | 176 | } |
183 | 177 | ||
184 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 178 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |