aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kernel/vdso.c
diff options
context:
space:
mode:
authorNathan Lynch <nathan_lynch@mentor.com>2015-03-25 14:15:08 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2015-03-27 18:20:45 -0400
commitecf99a439105ebd0a507af1a9cd901a2e166bf9a (patch)
treec24009696e0cd598a68eac95d7a512ff84e3da0a /arch/arm/kernel/vdso.c
parent8512287a8165592466cb9cb347ba94892e9c56a5 (diff)
ARM: 8331/1: VDSO initialization, mapping, and synchronization
Initialize the VDSO page list at boot, install the VDSO mapping at exec time, and update the data page during timer ticks. This code is not built if CONFIG_VDSO is not enabled. Account for the VDSO length when randomizing the offset from the stack. The [vdso] and [vvar] pages are placed immediately following the sigpage with separate _install_special_mapping calls. We want to "penalize" systems lacking the arch timer as little as possible. Previous versions of this code installed the VDSO unconditionally and unmodified, making it a measurably slower way for glibc to invoke the real syscalls on such systems. E.g. calling gettimeofday via glibc goes from ~560ns to ~630ns on i.MX6Q. If we can indicate to glibc that the time-related APIs in the VDSO are not accelerated, glibc can continue to invoke the syscalls directly instead of dispatching through the VDSO only to fall back to the slow path. Thus, if the architected timer is unusable for whatever reason, patch the VDSO at boot time so that symbol lookups for gettimeofday and clock_gettime return NULL. (This is similar to what powerpc does and borrows code from there.) This allows glibc to perform the syscall directly instead of passing control to the VDSO, which minimizes the penalty. In my measurements the time taken for a gettimeofday call via glibc goes from ~560ns to ~580ns (again on i.MX6Q), and this is solely due to adding a test and branch to glibc's gettimeofday syscall wrapper. An alternative to patching the VDSO at boot would be to not install the VDSO at all when the arch timer isn't usable. Another alternative is to include a separate "dummy" vdso.so without gettimeofday and clock_gettime, which would be selected at boot time. Either of these would get cumbersome if the VDSO were to gain support for an API such as getcpu which is unrelated to arch timer support. Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/kernel/vdso.c')
-rw-r--r--arch/arm/kernel/vdso.c337
1 files changed, 337 insertions, 0 deletions
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
new file mode 100644
index 000000000000..0d31d3ccab81
--- /dev/null
+++ b/arch/arm/kernel/vdso.c
@@ -0,0 +1,337 @@
1/*
2 * Adapted from arm64 version.
3 *
4 * Copyright (C) 2012 ARM Limited
5 * Copyright (C) 2015 Mentor Graphics Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/elf.h>
21#include <linux/err.h>
22#include <linux/kernel.h>
23#include <linux/mm.h>
24#include <linux/of.h>
25#include <linux/printk.h>
26#include <linux/slab.h>
27#include <linux/timekeeper_internal.h>
28#include <linux/vmalloc.h>
29#include <asm/arch_timer.h>
30#include <asm/barrier.h>
31#include <asm/cacheflush.h>
32#include <asm/page.h>
33#include <asm/vdso.h>
34#include <asm/vdso_datapage.h>
35#include <clocksource/arm_arch_timer.h>
36
37#define MAX_SYMNAME 64
38
39static struct page **vdso_text_pagelist;
40
41/* Total number of pages needed for the data and text portions of the VDSO. */
42unsigned int vdso_total_pages __read_mostly;
43
44/*
45 * The VDSO data page.
46 */
47static union vdso_data_store vdso_data_store __page_aligned_data;
48static struct vdso_data *vdso_data = &vdso_data_store.data;
49
50static struct page *vdso_data_page;
51static struct vm_special_mapping vdso_data_mapping = {
52 .name = "[vvar]",
53 .pages = &vdso_data_page,
54};
55
56static struct vm_special_mapping vdso_text_mapping = {
57 .name = "[vdso]",
58};
59
60struct elfinfo {
61 Elf32_Ehdr *hdr; /* ptr to ELF */
62 Elf32_Sym *dynsym; /* ptr to .dynsym section */
63 unsigned long dynsymsize; /* size of .dynsym section */
64 char *dynstr; /* ptr to .dynstr section */
65};
66
67/* Cached result of boot-time check for whether the arch timer exists,
68 * and if so, whether the virtual counter is useable.
69 */
70static bool cntvct_ok __read_mostly;
71
72static bool __init cntvct_functional(void)
73{
74 struct device_node *np;
75 bool ret = false;
76
77 if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
78 goto out;
79
80 /* The arm_arch_timer core should export
81 * arch_timer_use_virtual or similar so we don't have to do
82 * this.
83 */
84 np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
85 if (!np)
86 goto out_put;
87
88 if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
89 goto out_put;
90
91 ret = true;
92
93out_put:
94 of_node_put(np);
95out:
96 return ret;
97}
98
99static void * __init find_section(Elf32_Ehdr *ehdr, const char *name,
100 unsigned long *size)
101{
102 Elf32_Shdr *sechdrs;
103 unsigned int i;
104 char *secnames;
105
106 /* Grab section headers and strings so we can tell who is who */
107 sechdrs = (void *)ehdr + ehdr->e_shoff;
108 secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
109
110 /* Find the section they want */
111 for (i = 1; i < ehdr->e_shnum; i++) {
112 if (strcmp(secnames + sechdrs[i].sh_name, name) == 0) {
113 if (size)
114 *size = sechdrs[i].sh_size;
115 return (void *)ehdr + sechdrs[i].sh_offset;
116 }
117 }
118
119 if (size)
120 *size = 0;
121 return NULL;
122}
123
124static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname)
125{
126 unsigned int i;
127
128 for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
129 char name[MAX_SYMNAME], *c;
130
131 if (lib->dynsym[i].st_name == 0)
132 continue;
133 strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
134 MAX_SYMNAME);
135 c = strchr(name, '@');
136 if (c)
137 *c = 0;
138 if (strcmp(symname, name) == 0)
139 return &lib->dynsym[i];
140 }
141 return NULL;
142}
143
144static void __init vdso_nullpatch_one(struct elfinfo *lib, const char *symname)
145{
146 Elf32_Sym *sym;
147
148 sym = find_symbol(lib, symname);
149 if (!sym)
150 return;
151
152 sym->st_name = 0;
153}
154
155static void __init patch_vdso(void *ehdr)
156{
157 struct elfinfo einfo;
158
159 einfo = (struct elfinfo) {
160 .hdr = ehdr,
161 };
162
163 einfo.dynsym = find_section(einfo.hdr, ".dynsym", &einfo.dynsymsize);
164 einfo.dynstr = find_section(einfo.hdr, ".dynstr", NULL);
165
166 /* If the virtual counter is absent or non-functional we don't
167 * want programs to incur the slight additional overhead of
168 * dispatching through the VDSO only to fall back to syscalls.
169 */
170 if (!cntvct_ok) {
171 vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
172 vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
173 }
174}
175
176static int __init vdso_init(void)
177{
178 unsigned int text_pages;
179 int i;
180
181 if (memcmp(&vdso_start, "\177ELF", 4)) {
182 pr_err("VDSO is not a valid ELF object!\n");
183 return -ENOEXEC;
184 }
185
186 text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
187 pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start);
188
189 /* Allocate the VDSO text pagelist */
190 vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
191 GFP_KERNEL);
192 if (vdso_text_pagelist == NULL)
193 return -ENOMEM;
194
195 /* Grab the VDSO data page. */
196 vdso_data_page = virt_to_page(vdso_data);
197
198 /* Grab the VDSO text pages. */
199 for (i = 0; i < text_pages; i++) {
200 struct page *page;
201
202 page = virt_to_page(&vdso_start + i * PAGE_SIZE);
203 vdso_text_pagelist[i] = page;
204 }
205
206 vdso_text_mapping.pages = vdso_text_pagelist;
207
208 vdso_total_pages = 1; /* for the data/vvar page */
209 vdso_total_pages += text_pages;
210
211 cntvct_ok = cntvct_functional();
212
213 patch_vdso(&vdso_start);
214
215 return 0;
216}
217arch_initcall(vdso_init);
218
219static int install_vvar(struct mm_struct *mm, unsigned long addr)
220{
221 struct vm_area_struct *vma;
222
223 vma = _install_special_mapping(mm, addr, PAGE_SIZE,
224 VM_READ | VM_MAYREAD,
225 &vdso_data_mapping);
226
227 return IS_ERR(vma) ? PTR_ERR(vma) : 0;
228}
229
230/* assumes mmap_sem is write-locked */
231void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
232{
233 struct vm_area_struct *vma;
234 unsigned long len;
235
236 mm->context.vdso = 0;
237
238 if (vdso_text_pagelist == NULL)
239 return;
240
241 if (install_vvar(mm, addr))
242 return;
243
244 /* Account for vvar page. */
245 addr += PAGE_SIZE;
246 len = (vdso_total_pages - 1) << PAGE_SHIFT;
247
248 vma = _install_special_mapping(mm, addr, len,
249 VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
250 &vdso_text_mapping);
251
252 if (!IS_ERR(vma))
253 mm->context.vdso = addr;
254}
255
256static void vdso_write_begin(struct vdso_data *vdata)
257{
258 ++vdso_data->seq_count;
259 smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */
260}
261
262static void vdso_write_end(struct vdso_data *vdata)
263{
264 smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */
265 ++vdso_data->seq_count;
266}
267
268static bool tk_is_cntvct(const struct timekeeper *tk)
269{
270 if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
271 return false;
272
273 if (strcmp(tk->tkr.clock->name, "arch_sys_counter") != 0)
274 return false;
275
276 return true;
277}
278
279/**
280 * update_vsyscall - update the vdso data page
281 *
282 * Increment the sequence counter, making it odd, indicating to
283 * userspace that an update is in progress. Update the fields used
284 * for coarse clocks and, if the architected system timer is in use,
285 * the fields used for high precision clocks. Increment the sequence
286 * counter again, making it even, indicating to userspace that the
287 * update is finished.
288 *
289 * Userspace is expected to sample seq_count before reading any other
290 * fields from the data page. If seq_count is odd, userspace is
291 * expected to wait until it becomes even. After copying data from
292 * the page, userspace must sample seq_count again; if it has changed
293 * from its previous value, userspace must retry the whole sequence.
294 *
295 * Calls to update_vsyscall are serialized by the timekeeping core.
296 */
297void update_vsyscall(struct timekeeper *tk)
298{
299 struct timespec xtime_coarse;
300 struct timespec64 *wtm = &tk->wall_to_monotonic;
301
302 if (!cntvct_ok) {
303 /* The entry points have been zeroed, so there is no
304 * point in updating the data page.
305 */
306 return;
307 }
308
309 vdso_write_begin(vdso_data);
310
311 xtime_coarse = __current_kernel_time();
312 vdso_data->tk_is_cntvct = tk_is_cntvct(tk);
313 vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec;
314 vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec;
315 vdso_data->wtm_clock_sec = wtm->tv_sec;
316 vdso_data->wtm_clock_nsec = wtm->tv_nsec;
317
318 if (vdso_data->tk_is_cntvct) {
319 vdso_data->cs_cycle_last = tk->tkr.cycle_last;
320 vdso_data->xtime_clock_sec = tk->xtime_sec;
321 vdso_data->xtime_clock_snsec = tk->tkr.xtime_nsec;
322 vdso_data->cs_mult = tk->tkr.mult;
323 vdso_data->cs_shift = tk->tkr.shift;
324 vdso_data->cs_mask = tk->tkr.mask;
325 }
326
327 vdso_write_end(vdso_data);
328
329 flush_dcache_page(virt_to_page(vdso_data));
330}
331
332void update_vsyscall_tz(void)
333{
334 vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
335 vdso_data->tz_dsttime = sys_tz.tz_dsttime;
336 flush_dcache_page(virt_to_page(vdso_data));
337}