diff options
Diffstat (limited to 'arch/i386/kernel/sysenter.c')
-rw-r--r-- | arch/i386/kernel/sysenter.c | 269 |
1 files changed, 226 insertions, 43 deletions
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 13ca54a85a1c..ff4ee6f3326b 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -22,16 +22,26 @@ | |||
22 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
25 | #include <asm/elf.h> | ||
26 | #include <asm/tlbflush.h> | ||
27 | |||
28 | enum { | ||
29 | VDSO_DISABLED = 0, | ||
30 | VDSO_ENABLED = 1, | ||
31 | VDSO_COMPAT = 2, | ||
32 | }; | ||
33 | |||
34 | #ifdef CONFIG_COMPAT_VDSO | ||
35 | #define VDSO_DEFAULT VDSO_COMPAT | ||
36 | #else | ||
37 | #define VDSO_DEFAULT VDSO_ENABLED | ||
38 | #endif | ||
25 | 39 | ||
26 | /* | 40 | /* |
27 | * Should the kernel map a VDSO page into processes and pass its | 41 | * Should the kernel map a VDSO page into processes and pass its |
28 | * address down to glibc upon exec()? | 42 | * address down to glibc upon exec()? |
29 | */ | 43 | */ |
30 | #ifdef CONFIG_PARAVIRT | 44 | unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; |
31 | unsigned int __read_mostly vdso_enabled = 0; | ||
32 | #else | ||
33 | unsigned int __read_mostly vdso_enabled = 1; | ||
34 | #endif | ||
35 | 45 | ||
36 | EXPORT_SYMBOL_GPL(vdso_enabled); | 46 | EXPORT_SYMBOL_GPL(vdso_enabled); |
37 | 47 | ||
@@ -46,6 +56,123 @@ __setup("vdso=", vdso_setup); | |||
46 | 56 | ||
47 | extern asmlinkage void sysenter_entry(void); | 57 | extern asmlinkage void sysenter_entry(void); |
48 | 58 | ||
59 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | ||
60 | unsigned offset, unsigned size) | ||
61 | { | ||
62 | Elf32_Sym *sym = (void *)ehdr + offset; | ||
63 | unsigned nsym = size / sizeof(*sym); | ||
64 | unsigned i; | ||
65 | |||
66 | for(i = 0; i < nsym; i++, sym++) { | ||
67 | if (sym->st_shndx == SHN_UNDEF || | ||
68 | sym->st_shndx == SHN_ABS) | ||
69 | continue; /* skip */ | ||
70 | |||
71 | if (sym->st_shndx > SHN_LORESERVE) { | ||
72 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | ||
73 | sym->st_shndx); | ||
74 | continue; | ||
75 | } | ||
76 | |||
77 | switch(ELF_ST_TYPE(sym->st_info)) { | ||
78 | case STT_OBJECT: | ||
79 | case STT_FUNC: | ||
80 | case STT_SECTION: | ||
81 | case STT_FILE: | ||
82 | sym->st_value += VDSO_HIGH_BASE; | ||
83 | } | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | ||
88 | { | ||
89 | Elf32_Dyn *dyn = (void *)ehdr + offset; | ||
90 | |||
91 | for(; dyn->d_tag != DT_NULL; dyn++) | ||
92 | switch(dyn->d_tag) { | ||
93 | case DT_PLTGOT: | ||
94 | case DT_HASH: | ||
95 | case DT_STRTAB: | ||
96 | case DT_SYMTAB: | ||
97 | case DT_RELA: | ||
98 | case DT_INIT: | ||
99 | case DT_FINI: | ||
100 | case DT_REL: | ||
101 | case DT_DEBUG: | ||
102 | case DT_JMPREL: | ||
103 | case DT_VERSYM: | ||
104 | case DT_VERDEF: | ||
105 | case DT_VERNEED: | ||
106 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: | ||
107 | /* definitely pointers needing relocation */ | ||
108 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
109 | break; | ||
110 | |||
111 | case DT_ENCODING ... OLD_DT_LOOS-1: | ||
112 | case DT_LOOS ... DT_HIOS-1: | ||
113 | /* Tags above DT_ENCODING are pointers if | ||
114 | they're even */ | ||
115 | if (dyn->d_tag >= DT_ENCODING && | ||
116 | (dyn->d_tag & 1) == 0) | ||
117 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
118 | break; | ||
119 | |||
120 | case DT_VERDEFNUM: | ||
121 | case DT_VERNEEDNUM: | ||
122 | case DT_FLAGS_1: | ||
123 | case DT_RELACOUNT: | ||
124 | case DT_RELCOUNT: | ||
125 | case DT_VALRNGLO ... DT_VALRNGHI: | ||
126 | /* definitely not pointers */ | ||
127 | break; | ||
128 | |||
129 | case OLD_DT_LOOS ... DT_LOOS-1: | ||
130 | case DT_HIOS ... DT_VALRNGLO-1: | ||
131 | default: | ||
132 | if (dyn->d_tag > DT_ENCODING) | ||
133 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | ||
134 | dyn->d_tag); | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) | ||
140 | { | ||
141 | Elf32_Phdr *phdr; | ||
142 | Elf32_Shdr *shdr; | ||
143 | int i; | ||
144 | |||
145 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | ||
146 | !elf_check_arch(ehdr) || | ||
147 | ehdr->e_type != ET_DYN); | ||
148 | |||
149 | ehdr->e_entry += VDSO_HIGH_BASE; | ||
150 | |||
151 | /* rebase phdrs */ | ||
152 | phdr = (void *)ehdr + ehdr->e_phoff; | ||
153 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
154 | phdr[i].p_vaddr += VDSO_HIGH_BASE; | ||
155 | |||
156 | /* relocate dynamic stuff */ | ||
157 | if (phdr[i].p_type == PT_DYNAMIC) | ||
158 | reloc_dyn(ehdr, phdr[i].p_offset); | ||
159 | } | ||
160 | |||
161 | /* rebase sections */ | ||
162 | shdr = (void *)ehdr + ehdr->e_shoff; | ||
163 | for(i = 0; i < ehdr->e_shnum; i++) { | ||
164 | if (!(shdr[i].sh_flags & SHF_ALLOC)) | ||
165 | continue; | ||
166 | |||
167 | shdr[i].sh_addr += VDSO_HIGH_BASE; | ||
168 | |||
169 | if (shdr[i].sh_type == SHT_SYMTAB || | ||
170 | shdr[i].sh_type == SHT_DYNSYM) | ||
171 | reloc_symtab(ehdr, shdr[i].sh_offset, | ||
172 | shdr[i].sh_size); | ||
173 | } | ||
174 | } | ||
175 | |||
49 | void enable_sep_cpu(void) | 176 | void enable_sep_cpu(void) |
50 | { | 177 | { |
51 | int cpu = get_cpu(); | 178 | int cpu = get_cpu(); |
@@ -56,14 +183,33 @@ void enable_sep_cpu(void) | |||
56 | return; | 183 | return; |
57 | } | 184 | } |
58 | 185 | ||
59 | tss->ss1 = __KERNEL_CS; | 186 | tss->x86_tss.ss1 = __KERNEL_CS; |
60 | tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; | 187 | tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss; |
61 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 188 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
62 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); | 189 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0); |
63 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); | 190 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); |
64 | put_cpu(); | 191 | put_cpu(); |
65 | } | 192 | } |
66 | 193 | ||
194 | static struct vm_area_struct gate_vma; | ||
195 | |||
196 | static int __init gate_vma_init(void) | ||
197 | { | ||
198 | gate_vma.vm_mm = NULL; | ||
199 | gate_vma.vm_start = FIXADDR_USER_START; | ||
200 | gate_vma.vm_end = FIXADDR_USER_END; | ||
201 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | ||
202 | gate_vma.vm_page_prot = __P101; | ||
203 | /* | ||
204 | * Make sure the vDSO gets into every core dump. | ||
205 | * Dumping its contents makes post-mortem fully interpretable later | ||
206 | * without matching up the same kernel and hardware config to see | ||
207 | * what PC values meant. | ||
208 | */ | ||
209 | gate_vma.vm_flags |= VM_ALWAYSDUMP; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
67 | /* | 213 | /* |
68 | * These symbols are defined by vsyscall.o to mark the bounds | 214 | * These symbols are defined by vsyscall.o to mark the bounds |
69 | * of the ELF DSO images included therein. | 215 | * of the ELF DSO images included therein. |
@@ -72,31 +218,48 @@ extern const char vsyscall_int80_start, vsyscall_int80_end; | |||
72 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 218 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
73 | static struct page *syscall_pages[1]; | 219 | static struct page *syscall_pages[1]; |
74 | 220 | ||
221 | static void map_compat_vdso(int map) | ||
222 | { | ||
223 | static int vdso_mapped; | ||
224 | |||
225 | if (map == vdso_mapped) | ||
226 | return; | ||
227 | |||
228 | vdso_mapped = map; | ||
229 | |||
230 | __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, | ||
231 | map ? PAGE_READONLY_EXEC : PAGE_NONE); | ||
232 | |||
233 | /* flush stray tlbs */ | ||
234 | flush_tlb_all(); | ||
235 | } | ||
236 | |||
75 | int __init sysenter_setup(void) | 237 | int __init sysenter_setup(void) |
76 | { | 238 | { |
77 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | 239 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
240 | const void *vsyscall; | ||
241 | size_t vsyscall_len; | ||
242 | |||
78 | syscall_pages[0] = virt_to_page(syscall_page); | 243 | syscall_pages[0] = virt_to_page(syscall_page); |
79 | 244 | ||
80 | #ifdef CONFIG_COMPAT_VDSO | 245 | gate_vma_init(); |
81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); | 246 | |
82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | 247 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
83 | #endif | ||
84 | 248 | ||
85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 249 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
86 | memcpy(syscall_page, | 250 | vsyscall = &vsyscall_int80_start; |
87 | &vsyscall_int80_start, | 251 | vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; |
88 | &vsyscall_int80_end - &vsyscall_int80_start); | 252 | } else { |
89 | return 0; | 253 | vsyscall = &vsyscall_sysenter_start; |
254 | vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; | ||
90 | } | 255 | } |
91 | 256 | ||
92 | memcpy(syscall_page, | 257 | memcpy(syscall_page, vsyscall, vsyscall_len); |
93 | &vsyscall_sysenter_start, | 258 | relocate_vdso(syscall_page); |
94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | ||
95 | 259 | ||
96 | return 0; | 260 | return 0; |
97 | } | 261 | } |
98 | 262 | ||
99 | #ifndef CONFIG_COMPAT_VDSO | ||
100 | /* Defined in vsyscall-sysenter.S */ | 263 | /* Defined in vsyscall-sysenter.S */ |
101 | extern void SYSENTER_RETURN; | 264 | extern void SYSENTER_RETURN; |
102 | 265 | ||
@@ -105,36 +268,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | |||
105 | { | 268 | { |
106 | struct mm_struct *mm = current->mm; | 269 | struct mm_struct *mm = current->mm; |
107 | unsigned long addr; | 270 | unsigned long addr; |
108 | int ret; | 271 | int ret = 0; |
272 | bool compat; | ||
109 | 273 | ||
110 | down_write(&mm->mmap_sem); | 274 | down_write(&mm->mmap_sem); |
111 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
112 | if (IS_ERR_VALUE(addr)) { | ||
113 | ret = addr; | ||
114 | goto up_fail; | ||
115 | } | ||
116 | 275 | ||
117 | /* | 276 | /* Test compat mode once here, in case someone |
118 | * MAYWRITE to allow gdb to COW and set breakpoints | 277 | changes it via sysctl */ |
119 | * | 278 | compat = (vdso_enabled == VDSO_COMPAT); |
120 | * Make sure the vDSO gets into every core dump. | 279 | |
121 | * Dumping its contents makes post-mortem fully interpretable later | 280 | map_compat_vdso(compat); |
122 | * without matching up the same kernel and hardware config to see | 281 | |
123 | * what PC values meant. | 282 | if (compat) |
124 | */ | 283 | addr = VDSO_HIGH_BASE; |
125 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | 284 | else { |
126 | VM_READ|VM_EXEC| | 285 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
127 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 286 | if (IS_ERR_VALUE(addr)) { |
128 | VM_ALWAYSDUMP, | 287 | ret = addr; |
129 | syscall_pages); | 288 | goto up_fail; |
130 | if (ret) | 289 | } |
131 | goto up_fail; | 290 | |
291 | /* | ||
292 | * MAYWRITE to allow gdb to COW and set breakpoints | ||
293 | * | ||
294 | * Make sure the vDSO gets into every core dump. | ||
295 | * Dumping its contents makes post-mortem fully | ||
296 | * interpretable later without matching up the same | ||
297 | * kernel and hardware config to see what PC values | ||
298 | * meant. | ||
299 | */ | ||
300 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | ||
301 | VM_READ|VM_EXEC| | ||
302 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | ||
303 | VM_ALWAYSDUMP, | ||
304 | syscall_pages); | ||
305 | |||
306 | if (ret) | ||
307 | goto up_fail; | ||
308 | } | ||
132 | 309 | ||
133 | current->mm->context.vdso = (void *)addr; | 310 | current->mm->context.vdso = (void *)addr; |
134 | current_thread_info()->sysenter_return = | 311 | current_thread_info()->sysenter_return = |
135 | (void *)VDSO_SYM(&SYSENTER_RETURN); | 312 | (void *)VDSO_SYM(&SYSENTER_RETURN); |
136 | up_fail: | 313 | |
314 | up_fail: | ||
137 | up_write(&mm->mmap_sem); | 315 | up_write(&mm->mmap_sem); |
316 | |||
138 | return ret; | 317 | return ret; |
139 | } | 318 | } |
140 | 319 | ||
@@ -147,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
147 | 326 | ||
148 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | 327 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
149 | { | 328 | { |
329 | struct mm_struct *mm = tsk->mm; | ||
330 | |||
331 | /* Check to see if this task was created in compat vdso mode */ | ||
332 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | ||
333 | return &gate_vma; | ||
150 | return NULL; | 334 | return NULL; |
151 | } | 335 | } |
152 | 336 | ||
@@ -159,4 +343,3 @@ int in_gate_area_no_task(unsigned long addr) | |||
159 | { | 343 | { |
160 | return 0; | 344 | return 0; |
161 | } | 345 | } |
162 | #endif | ||