diff options
author | Andy Lutomirski <luto@mit.edu> | 2011-07-13 09:24:16 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2011-07-14 20:57:09 -0400 |
commit | 98eedc3a9dbf90cecb91093d2a7fa083942b7d13 (patch) | |
tree | 0ed9320faed2d62caea337b978d5216a7fea55a8 /Documentation/vDSO/parse_vdso.c | |
parent | 574c44fa8fa6262ffd5939789ef51a6e98ed62d7 (diff) |
Document the vDSO and add a reference parser
It turns out that parsing the vDSO is nontrivial if you don't already
have an ELF dynamic loader around. So document it in Documentation/ABI
and add a reference CC0-licenced parser.
This code is dedicated to Go issue 1933:
http://code.google.com/p/go/issues/detail?id=1933
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Link: http://lkml.kernel.org/r/a315a9514cd71bcf29436cc31e35aada21a5ff21.1310563276.git.luto@mit.edu
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'Documentation/vDSO/parse_vdso.c')
-rw-r--r-- | Documentation/vDSO/parse_vdso.c | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c new file mode 100644 index 000000000000..85870208edcf --- /dev/null +++ b/Documentation/vDSO/parse_vdso.c | |||
@@ -0,0 +1,256 @@ | |||
1 | /* | ||
2 | * parse_vdso.c: Linux reference vDSO parser | ||
3 | * Written by Andrew Lutomirski, 2011. | ||
4 | * | ||
5 | * This code is meant to be linked in to various programs that run on Linux. | ||
6 | * As such, it is available with as few restrictions as possible. This file | ||
7 | * is licensed under the Creative Commons Zero License, version 1.0, | ||
8 | * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode | ||
9 | * | ||
10 | * The vDSO is a regular ELF DSO that the kernel maps into user space when | ||
11 | * it starts a program. It works equally well in statically and dynamically | ||
12 | * linked binaries. | ||
13 | * | ||
14 | * This code is tested on x86_64. In principle it should work on any 64-bit | ||
15 | * architecture that has a vDSO. | ||
16 | */ | ||
17 | |||
18 | #include <stdbool.h> | ||
19 | #include <stdint.h> | ||
20 | #include <string.h> | ||
21 | #include <elf.h> | ||
22 | |||
23 | /* | ||
24 | * To use this vDSO parser, first call one of the vdso_init_* functions. | ||
25 | * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR | ||
26 | * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv. | ||
27 | * Then call vdso_sym for each symbol you want. For example, to look up | ||
28 | * gettimeofday on x86_64, use: | ||
29 | * | ||
30 | * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday"); | ||
31 | * or | ||
32 | * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); | ||
33 | * | ||
34 | * vdso_sym will return 0 if the symbol doesn't exist or if the init function | ||
35 | * failed or was not called. vdso_sym is a little slow, so its return value | ||
36 | * should be cached. | ||
37 | * | ||
38 | * vdso_sym is threadsafe; the init functions are not. | ||
39 | * | ||
40 | * These are the prototypes: | ||
41 | */ | ||
42 | extern void vdso_init_from_auxv(void *auxv); | ||
43 | extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); | ||
44 | extern void *vdso_sym(const char *version, const char *name); | ||
45 | |||
46 | |||
47 | /* And here's the code. */ | ||
48 | |||
49 | #ifndef __x86_64__ | ||
50 | # error Not yet ported to non-x86_64 architectures | ||
51 | #endif | ||
52 | |||
53 | static struct vdso_info | ||
54 | { | ||
55 | bool valid; | ||
56 | |||
57 | /* Load information */ | ||
58 | uintptr_t load_addr; | ||
59 | uintptr_t load_offset; /* load_addr - recorded vaddr */ | ||
60 | |||
61 | /* Symbol table */ | ||
62 | Elf64_Sym *symtab; | ||
63 | const char *symstrings; | ||
64 | Elf64_Word *bucket, *chain; | ||
65 | Elf64_Word nbucket, nchain; | ||
66 | |||
67 | /* Version table */ | ||
68 | Elf64_Versym *versym; | ||
69 | Elf64_Verdef *verdef; | ||
70 | } vdso_info; | ||
71 | |||
72 | /* Straight from the ELF specification. */ | ||
73 | static unsigned long elf_hash(const unsigned char *name) | ||
74 | { | ||
75 | unsigned long h = 0, g; | ||
76 | while (*name) | ||
77 | { | ||
78 | h = (h << 4) + *name++; | ||
79 | if (g = h & 0xf0000000) | ||
80 | h ^= g >> 24; | ||
81 | h &= ~g; | ||
82 | } | ||
83 | return h; | ||
84 | } | ||
85 | |||
86 | void vdso_init_from_sysinfo_ehdr(uintptr_t base) | ||
87 | { | ||
88 | size_t i; | ||
89 | bool found_vaddr = false; | ||
90 | |||
91 | vdso_info.valid = false; | ||
92 | |||
93 | vdso_info.load_addr = base; | ||
94 | |||
95 | Elf64_Ehdr *hdr = (Elf64_Ehdr*)base; | ||
96 | Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff); | ||
97 | Elf64_Dyn *dyn = 0; | ||
98 | |||
99 | /* | ||
100 | * We need two things from the segment table: the load offset | ||
101 | * and the dynamic table. | ||
102 | */ | ||
103 | for (i = 0; i < hdr->e_phnum; i++) | ||
104 | { | ||
105 | if (pt[i].p_type == PT_LOAD && !found_vaddr) { | ||
106 | found_vaddr = true; | ||
107 | vdso_info.load_offset = base | ||
108 | + (uintptr_t)pt[i].p_offset | ||
109 | - (uintptr_t)pt[i].p_vaddr; | ||
110 | } else if (pt[i].p_type == PT_DYNAMIC) { | ||
111 | dyn = (Elf64_Dyn*)(base + pt[i].p_offset); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | if (!found_vaddr || !dyn) | ||
116 | return; /* Failed */ | ||
117 | |||
118 | /* | ||
119 | * Fish out the useful bits of the dynamic table. | ||
120 | */ | ||
121 | Elf64_Word *hash = 0; | ||
122 | vdso_info.symstrings = 0; | ||
123 | vdso_info.symtab = 0; | ||
124 | vdso_info.versym = 0; | ||
125 | vdso_info.verdef = 0; | ||
126 | for (i = 0; dyn[i].d_tag != DT_NULL; i++) { | ||
127 | switch (dyn[i].d_tag) { | ||
128 | case DT_STRTAB: | ||
129 | vdso_info.symstrings = (const char *) | ||
130 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
131 | + vdso_info.load_offset); | ||
132 | break; | ||
133 | case DT_SYMTAB: | ||
134 | vdso_info.symtab = (Elf64_Sym *) | ||
135 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
136 | + vdso_info.load_offset); | ||
137 | break; | ||
138 | case DT_HASH: | ||
139 | hash = (Elf64_Word *) | ||
140 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
141 | + vdso_info.load_offset); | ||
142 | break; | ||
143 | case DT_VERSYM: | ||
144 | vdso_info.versym = (Elf64_Versym *) | ||
145 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
146 | + vdso_info.load_offset); | ||
147 | break; | ||
148 | case DT_VERDEF: | ||
149 | vdso_info.verdef = (Elf64_Verdef *) | ||
150 | ((uintptr_t)dyn[i].d_un.d_ptr | ||
151 | + vdso_info.load_offset); | ||
152 | break; | ||
153 | } | ||
154 | } | ||
155 | if (!vdso_info.symstrings || !vdso_info.symtab || !hash) | ||
156 | return; /* Failed */ | ||
157 | |||
158 | if (!vdso_info.verdef) | ||
159 | vdso_info.versym = 0; | ||
160 | |||
161 | /* Parse the hash table header. */ | ||
162 | vdso_info.nbucket = hash[0]; | ||
163 | vdso_info.nchain = hash[1]; | ||
164 | vdso_info.bucket = &hash[2]; | ||
165 | vdso_info.chain = &hash[vdso_info.nbucket + 2]; | ||
166 | |||
167 | /* That's all we need. */ | ||
168 | vdso_info.valid = true; | ||
169 | } | ||
170 | |||
171 | static bool vdso_match_version(Elf64_Versym ver, | ||
172 | const char *name, Elf64_Word hash) | ||
173 | { | ||
174 | /* | ||
175 | * This is a helper function to check if the version indexed by | ||
176 | * ver matches name (which hashes to hash). | ||
177 | * | ||
178 | * The version definition table is a mess, and I don't know how | ||
179 | * to do this in better than linear time without allocating memory | ||
180 | * to build an index. I also don't know why the table has | ||
181 | * variable size entries in the first place. | ||
182 | * | ||
183 | * For added fun, I can't find a comprehensible specification of how | ||
184 | * to parse all the weird flags in the table. | ||
185 | * | ||
186 | * So I just parse the whole table every time. | ||
187 | */ | ||
188 | |||
189 | /* First step: find the version definition */ | ||
190 | ver &= 0x7fff; /* Apparently bit 15 means "hidden" */ | ||
191 | Elf64_Verdef *def = vdso_info.verdef; | ||
192 | while(true) { | ||
193 | if ((def->vd_flags & VER_FLG_BASE) == 0 | ||
194 | && (def->vd_ndx & 0x7fff) == ver) | ||
195 | break; | ||
196 | |||
197 | if (def->vd_next == 0) | ||
198 | return false; /* No definition. */ | ||
199 | |||
200 | def = (Elf64_Verdef *)((char *)def + def->vd_next); | ||
201 | } | ||
202 | |||
203 | /* Now figure out whether it matches. */ | ||
204 | Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux); | ||
205 | return def->vd_hash == hash | ||
206 | && !strcmp(name, vdso_info.symstrings + aux->vda_name); | ||
207 | } | ||
208 | |||
209 | void *vdso_sym(const char *version, const char *name) | ||
210 | { | ||
211 | unsigned long ver_hash; | ||
212 | if (!vdso_info.valid) | ||
213 | return 0; | ||
214 | |||
215 | ver_hash = elf_hash(version); | ||
216 | Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; | ||
217 | |||
218 | for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { | ||
219 | Elf64_Sym *sym = &vdso_info.symtab[chain]; | ||
220 | |||
221 | /* Check for a defined global or weak function w/ right name. */ | ||
222 | if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) | ||
223 | continue; | ||
224 | if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && | ||
225 | ELF64_ST_BIND(sym->st_info) != STB_WEAK) | ||
226 | continue; | ||
227 | if (sym->st_shndx == SHN_UNDEF) | ||
228 | continue; | ||
229 | if (strcmp(name, vdso_info.symstrings + sym->st_name)) | ||
230 | continue; | ||
231 | |||
232 | /* Check symbol version. */ | ||
233 | if (vdso_info.versym | ||
234 | && !vdso_match_version(vdso_info.versym[chain], | ||
235 | version, ver_hash)) | ||
236 | continue; | ||
237 | |||
238 | return (void *)(vdso_info.load_offset + sym->st_value); | ||
239 | } | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | void vdso_init_from_auxv(void *auxv) | ||
245 | { | ||
246 | Elf64_auxv_t *elf_auxv = auxv; | ||
247 | for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) | ||
248 | { | ||
249 | if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { | ||
250 | vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val); | ||
251 | return; | ||
252 | } | ||
253 | } | ||
254 | |||
255 | vdso_info.valid = false; | ||
256 | } | ||