aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 20:27:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 20:27:47 -0400
commit98a96f202203fecad65b44449077c695686ad4db (patch)
treee3544d5323fcf570607c180d6395b2113b54e007
parent5637a2a3e99375a04189ee0896aae985582a2290 (diff)
parent53b884ac3745353de220d92ef792515c3ae692f0 (diff)
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Further simplifications and improvements to the VDSO code, by Andy Lutomirski" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86_64/vsyscall: Fix warn_bad_vsyscall log output x86/vdso: Set VM_MAYREAD for the vvar vma x86, vdso: Get rid of the fake section mechanism x86, vdso: Move the vvar area before the vdso text
-rw-r--r--arch/x86/include/asm/vdso.h18
-rw-r--r--arch/x86/kernel/vsyscall_64.c8
-rw-r--r--arch/x86/vdso/Makefile16
-rw-r--r--arch/x86/vdso/vdso-fakesections.c21
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S44
-rw-r--r--arch/x86/vdso/vdso2c.c128
-rw-r--r--arch/x86/vdso/vdso2c.h227
-rw-r--r--arch/x86/vdso/vma.c22
8 files changed, 193 insertions, 291 deletions
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 30be253dd283..8021bd28c0f1 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -18,15 +18,15 @@ struct vdso_image {
18 18
19 unsigned long alt, alt_len; 19 unsigned long alt, alt_len;
20 20
21 unsigned long sym_end_mapping; /* Total size of the mapping */ 21 long sym_vvar_start; /* Negative offset to the vvar area */
22 22
23 unsigned long sym_vvar_page; 23 long sym_vvar_page;
24 unsigned long sym_hpet_page; 24 long sym_hpet_page;
25 unsigned long sym_VDSO32_NOTE_MASK; 25 long sym_VDSO32_NOTE_MASK;
26 unsigned long sym___kernel_sigreturn; 26 long sym___kernel_sigreturn;
27 unsigned long sym___kernel_rt_sigreturn; 27 long sym___kernel_rt_sigreturn;
28 unsigned long sym___kernel_vsyscall; 28 long sym___kernel_vsyscall;
29 unsigned long sym_VDSO32_SYSENTER_RETURN; 29 long sym_VDSO32_SYSENTER_RETURN;
30}; 30};
31 31
32#ifdef CONFIG_X86_64 32#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index ea5b5709aa76..e1e1e80fc6a6 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -81,10 +81,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
81 if (!show_unhandled_signals) 81 if (!show_unhandled_signals)
82 return; 82 return;
83 83
84 pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", 84 printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
85 level, current->comm, task_pid_nr(current), 85 level, current->comm, task_pid_nr(current),
86 message, regs->ip, regs->cs, 86 message, regs->ip, regs->cs,
87 regs->sp, regs->ax, regs->si, regs->di); 87 regs->sp, regs->ax, regs->si, regs->di);
88} 88}
89 89
90static int addr_to_vsyscall_nr(unsigned long addr) 90static int addr_to_vsyscall_nr(unsigned long addr)
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 61b04fe36e66..5a4affe025e8 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -10,7 +10,7 @@ VDSO32-$(CONFIG_X86_32) := y
10VDSO32-$(CONFIG_COMPAT) := y 10VDSO32-$(CONFIG_COMPAT) := y
11 11
12# files to link into the vdso 12# files to link into the vdso
13vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o 13vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
14 14
15# files to link into kernel 15# files to link into kernel
16obj-y += vma.o 16obj-y += vma.o
@@ -37,7 +37,8 @@ vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
37obj-y += $(vdso_img_objs) 37obj-y += $(vdso_img_objs)
38targets += $(vdso_img_cfiles) 38targets += $(vdso_img_cfiles)
39targets += $(vdso_img_sodbg) 39targets += $(vdso_img_sodbg)
40.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) 40.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
41 $(vdso_img-y:%=$(obj)/vdso%.so)
41 42
42export CPPFLAGS_vdso.lds += -P -C 43export CPPFLAGS_vdso.lds += -P -C
43 44
@@ -54,10 +55,10 @@ hostprogs-y += vdso2c
54 55
55quiet_cmd_vdso2c = VDSO2C $@ 56quiet_cmd_vdso2c = VDSO2C $@
56define cmd_vdso2c 57define cmd_vdso2c
57 $(obj)/vdso2c $< $@ 58 $(obj)/vdso2c $< $(<:%.dbg=%) $@
58endef 59endef
59 60
60$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE 61$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
61 $(call if_changed,vdso2c) 62 $(call if_changed,vdso2c)
62 63
63# 64#
@@ -113,6 +114,10 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
113 114
114targets += vdsox32.lds $(vobjx32s-y) 115targets += vdsox32.lds $(vobjx32s-y)
115 116
117$(obj)/%.so: OBJCOPYFLAGS := -S
118$(obj)/%.so: $(obj)/%.so.dbg
119 $(call if_changed,objcopy)
120
116$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE 121$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
117 $(call if_changed,vdso) 122 $(call if_changed,vdso)
118 123
@@ -134,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
134 139
135targets += vdso32/vdso32.lds 140targets += vdso32/vdso32.lds
136targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) 141targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
137targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o 142targets += vdso32/vclock_gettime.o
138 143
139$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) 144$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
140 145
@@ -156,7 +161,6 @@ $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
156$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ 161$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
157 $(obj)/vdso32/vdso32.lds \ 162 $(obj)/vdso32/vdso32.lds \
158 $(obj)/vdso32/vclock_gettime.o \ 163 $(obj)/vdso32/vclock_gettime.o \
159 $(obj)/vdso32/vdso-fakesections.o \
160 $(obj)/vdso32/note.o \ 164 $(obj)/vdso32/note.o \
161 $(obj)/vdso32/%.o 165 $(obj)/vdso32/%.o
162 $(call if_changed,vdso) 166 $(call if_changed,vdso)
diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c
deleted file mode 100644
index aa5fbfab20a5..000000000000
--- a/arch/x86/vdso/vdso-fakesections.c
+++ /dev/null
@@ -1,21 +0,0 @@
1/*
2 * Copyright 2014 Andy Lutomirski
3 * Subject to the GNU Public License, v.2
4 *
5 * String table for loadable section headers. See vdso2c.h for why
6 * this exists.
7 */
8
9const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) =
10 ".hash\0"
11 ".dynsym\0"
12 ".dynstr\0"
13 ".gnu.version\0"
14 ".gnu.version_d\0"
15 ".dynamic\0"
16 ".rodata\0"
17 ".fake_shstrtab\0" /* Yay, self-referential code. */
18 ".note\0"
19 ".eh_frame_hdr\0"
20 ".eh_frame\0"
21 ".text";
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 9197544eea9a..de2c921025f5 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -18,6 +18,25 @@
18 18
19SECTIONS 19SECTIONS
20{ 20{
21 /*
22 * User/kernel shared data is before the vDSO. This may be a little
23 * uglier than putting it after the vDSO, but it avoids issues with
24 * non-allocatable things that dangle past the end of the PT_LOAD
25 * segment.
26 */
27
28 vvar_start = . - 2 * PAGE_SIZE;
29 vvar_page = vvar_start;
30
31 /* Place all vvars at the offsets in asm/vvar.h. */
32#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
33#define __VVAR_KERNEL_LDS
34#include <asm/vvar.h>
35#undef __VVAR_KERNEL_LDS
36#undef EMIT_VVAR
37
38 hpet_page = vvar_start + PAGE_SIZE;
39
21 . = SIZEOF_HEADERS; 40 . = SIZEOF_HEADERS;
22 41
23 .hash : { *(.hash) } :text 42 .hash : { *(.hash) } :text
@@ -74,31 +93,6 @@ SECTIONS
74 .altinstructions : { *(.altinstructions) } :text 93 .altinstructions : { *(.altinstructions) } :text
75 .altinstr_replacement : { *(.altinstr_replacement) } :text 94 .altinstr_replacement : { *(.altinstr_replacement) } :text
76 95
77 /*
78 * The remainder of the vDSO consists of special pages that are
79 * shared between the kernel and userspace. It needs to be at the
80 * end so that it doesn't overlap the mapping of the actual
81 * vDSO image.
82 */
83
84 . = ALIGN(PAGE_SIZE);
85 vvar_page = .;
86
87 /* Place all vvars at the offsets in asm/vvar.h. */
88#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
89#define __VVAR_KERNEL_LDS
90#include <asm/vvar.h>
91#undef __VVAR_KERNEL_LDS
92#undef EMIT_VVAR
93
94 . = vvar_page + PAGE_SIZE;
95
96 hpet_page = .;
97 . = . + PAGE_SIZE;
98
99 . = ALIGN(PAGE_SIZE);
100 end_mapping = .;
101
102 /DISCARD/ : { 96 /DISCARD/ : {
103 *(.discard) 97 *(.discard)
104 *(.discard.*) 98 *(.discard.*)
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
index 238dbe82776e..8627db24a7f6 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c
@@ -1,3 +1,53 @@
1/*
2 * vdso2c - A vdso image preparation tool
3 * Copyright (c) 2014 Andy Lutomirski and others
4 * Licensed under the GPL v2
5 *
6 * vdso2c requires stripped and unstripped input. It would be trivial
7 * to fully strip the input in here, but, for reasons described below,
8 * we need to write a section table. Doing this is more or less
9 * equivalent to dropping all non-allocatable sections, but it's
10 * easier to let objcopy handle that instead of doing it ourselves.
11 * If we ever need to do something fancier than what objcopy provides,
12 * it would be straightforward to add here.
13 *
14 * We're keep a section table for a few reasons:
15 *
16 * The Go runtime had a couple of bugs: it would read the section
17 * table to try to figure out how many dynamic symbols there were (it
18 * shouldn't have looked at the section table at all) and, if there
19 * were no SHT_SYNDYM section table entry, it would use an
20 * uninitialized value for the number of symbols. An empty DYNSYM
21 * table would work, but I see no reason not to write a valid one (and
22 * keep full performance for old Go programs). This hack is only
23 * needed on x86_64.
24 *
25 * The bug was introduced on 2012-08-31 by:
26 * https://code.google.com/p/go/source/detail?r=56ea40aac72b
27 * and was fixed on 2014-06-13 by:
28 * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
29 *
30 * Binutils has issues debugging the vDSO: it reads the section table to
31 * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
32 * would break build-id if we removed the section table. Binutils
33 * also requires that shstrndx != 0. See:
34 * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
35 *
36 * elfutils might not look for PT_NOTE if there is a section table at
37 * all. I don't know whether this matters for any practical purpose.
38 *
39 * For simplicity, rather than hacking up a partial section table, we
40 * just write a mostly complete one. We omit non-dynamic symbols,
41 * though, since they're rather large.
42 *
43 * Once binutils gets fixed, we might be able to drop this for all but
44 * the 64-bit vdso, since build-id only works in kernel RPMs, and
45 * systems that update to new enough kernel RPMs will likely update
46 * binutils in sync. build-id has never worked for home-built kernel
47 * RPMs without manual symlinking, and I suspect that no one ever does
48 * that.
49 */
50
1#include <inttypes.h> 51#include <inttypes.h>
2#include <stdint.h> 52#include <stdint.h>
3#include <unistd.h> 53#include <unistd.h>
@@ -20,9 +70,9 @@ const char *outfilename;
20 70
21/* Symbols that we need in vdso2c. */ 71/* Symbols that we need in vdso2c. */
22enum { 72enum {
73 sym_vvar_start,
23 sym_vvar_page, 74 sym_vvar_page,
24 sym_hpet_page, 75 sym_hpet_page,
25 sym_end_mapping,
26 sym_VDSO_FAKE_SECTION_TABLE_START, 76 sym_VDSO_FAKE_SECTION_TABLE_START,
27 sym_VDSO_FAKE_SECTION_TABLE_END, 77 sym_VDSO_FAKE_SECTION_TABLE_END,
28}; 78};
@@ -38,9 +88,9 @@ struct vdso_sym {
38}; 88};
39 89
40struct vdso_sym required_syms[] = { 90struct vdso_sym required_syms[] = {
91 [sym_vvar_start] = {"vvar_start", true},
41 [sym_vvar_page] = {"vvar_page", true}, 92 [sym_vvar_page] = {"vvar_page", true},
42 [sym_hpet_page] = {"hpet_page", true}, 93 [sym_hpet_page] = {"hpet_page", true},
43 [sym_end_mapping] = {"end_mapping", true},
44 [sym_VDSO_FAKE_SECTION_TABLE_START] = { 94 [sym_VDSO_FAKE_SECTION_TABLE_START] = {
45 "VDSO_FAKE_SECTION_TABLE_START", false 95 "VDSO_FAKE_SECTION_TABLE_START", false
46 }, 96 },
@@ -61,7 +111,8 @@ static void fail(const char *format, ...)
61 va_start(ap, format); 111 va_start(ap, format);
62 fprintf(stderr, "Error: "); 112 fprintf(stderr, "Error: ");
63 vfprintf(stderr, format, ap); 113 vfprintf(stderr, format, ap);
64 unlink(outfilename); 114 if (outfilename)
115 unlink(outfilename);
65 exit(1); 116 exit(1);
66 va_end(ap); 117 va_end(ap);
67} 118}
@@ -96,9 +147,11 @@ extern void bad_put_le(void);
96 147
97#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) 148#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
98 149
99#define BITSFUNC3(name, bits) name##bits 150#define BITSFUNC3(name, bits, suffix) name##bits##suffix
100#define BITSFUNC2(name, bits) BITSFUNC3(name, bits) 151#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
101#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS) 152#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
153
154#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
102 155
103#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x 156#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
104#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) 157#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
@@ -112,30 +165,53 @@ extern void bad_put_le(void);
112#include "vdso2c.h" 165#include "vdso2c.h"
113#undef ELF_BITS 166#undef ELF_BITS
114 167
115static void go(void *addr, size_t len, FILE *outfile, const char *name) 168static void go(void *raw_addr, size_t raw_len,
169 void *stripped_addr, size_t stripped_len,
170 FILE *outfile, const char *name)
116{ 171{
117 Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr; 172 Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
118 173
119 if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { 174 if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
120 go64(addr, len, outfile, name); 175 go64(raw_addr, raw_len, stripped_addr, stripped_len,
176 outfile, name);
121 } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { 177 } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
122 go32(addr, len, outfile, name); 178 go32(raw_addr, raw_len, stripped_addr, stripped_len,
179 outfile, name);
123 } else { 180 } else {
124 fail("unknown ELF class\n"); 181 fail("unknown ELF class\n");
125 } 182 }
126} 183}
127 184
185static void map_input(const char *name, void **addr, size_t *len, int prot)
186{
187 off_t tmp_len;
188
189 int fd = open(name, O_RDONLY);
190 if (fd == -1)
191 err(1, "%s", name);
192
193 tmp_len = lseek(fd, 0, SEEK_END);
194 if (tmp_len == (off_t)-1)
195 err(1, "lseek");
196 *len = (size_t)tmp_len;
197
198 *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
199 if (*addr == MAP_FAILED)
200 err(1, "mmap");
201
202 close(fd);
203}
204
128int main(int argc, char **argv) 205int main(int argc, char **argv)
129{ 206{
130 int fd; 207 size_t raw_len, stripped_len;
131 off_t len; 208 void *raw_addr, *stripped_addr;
132 void *addr;
133 FILE *outfile; 209 FILE *outfile;
134 char *name, *tmp; 210 char *name, *tmp;
135 int namelen; 211 int namelen;
136 212
137 if (argc != 3) { 213 if (argc != 4) {
138 printf("Usage: vdso2c INPUT OUTPUT\n"); 214 printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
139 return 1; 215 return 1;
140 } 216 }
141 217
@@ -143,7 +219,7 @@ int main(int argc, char **argv)
143 * Figure out the struct name. If we're writing to a .so file, 219 * Figure out the struct name. If we're writing to a .so file,
144 * generate raw output insted. 220 * generate raw output insted.
145 */ 221 */
146 name = strdup(argv[2]); 222 name = strdup(argv[3]);
147 namelen = strlen(name); 223 namelen = strlen(name);
148 if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { 224 if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
149 name = NULL; 225 name = NULL;
@@ -159,26 +235,18 @@ int main(int argc, char **argv)
159 *tmp = '_'; 235 *tmp = '_';
160 } 236 }
161 237
162 fd = open(argv[1], O_RDONLY); 238 map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
163 if (fd == -1) 239 map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
164 err(1, "%s", argv[1]);
165
166 len = lseek(fd, 0, SEEK_END);
167 if (len == (off_t)-1)
168 err(1, "lseek");
169
170 addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
171 if (addr == MAP_FAILED)
172 err(1, "mmap");
173 240
174 outfilename = argv[2]; 241 outfilename = argv[3];
175 outfile = fopen(outfilename, "w"); 242 outfile = fopen(outfilename, "w");
176 if (!outfile) 243 if (!outfile)
177 err(1, "%s", argv[2]); 244 err(1, "%s", argv[2]);
178 245
179 go(addr, (size_t)len, outfile, name); 246 go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
180 247
181 munmap(addr, len); 248 munmap(raw_addr, raw_len);
249 munmap(stripped_addr, stripped_len);
182 fclose(outfile); 250 fclose(outfile);
183 251
184 return 0; 252 return 0;
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index 11b65d4f9414..fd57829b30d8 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -4,139 +4,23 @@
4 * are built for 32-bit userspace. 4 * are built for 32-bit userspace.
5 */ 5 */
6 6
7/* 7static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
8 * We're writing a section table for a few reasons: 8 void *stripped_addr, size_t stripped_len,
9 *
10 * The Go runtime had a couple of bugs: it would read the section
11 * table to try to figure out how many dynamic symbols there were (it
12 * shouldn't have looked at the section table at all) and, if there
13 * were no SHT_SYNDYM section table entry, it would use an
14 * uninitialized value for the number of symbols. An empty DYNSYM
15 * table would work, but I see no reason not to write a valid one (and
16 * keep full performance for old Go programs). This hack is only
17 * needed on x86_64.
18 *
19 * The bug was introduced on 2012-08-31 by:
20 * https://code.google.com/p/go/source/detail?r=56ea40aac72b
21 * and was fixed on 2014-06-13 by:
22 * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
23 *
24 * Binutils has issues debugging the vDSO: it reads the section table to
25 * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
26 * would break build-id if we removed the section table. Binutils
27 * also requires that shstrndx != 0. See:
28 * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
29 *
30 * elfutils might not look for PT_NOTE if there is a section table at
31 * all. I don't know whether this matters for any practical purpose.
32 *
33 * For simplicity, rather than hacking up a partial section table, we
34 * just write a mostly complete one. We omit non-dynamic symbols,
35 * though, since they're rather large.
36 *
37 * Once binutils gets fixed, we might be able to drop this for all but
38 * the 64-bit vdso, since build-id only works in kernel RPMs, and
39 * systems that update to new enough kernel RPMs will likely update
40 * binutils in sync. build-id has never worked for home-built kernel
41 * RPMs without manual symlinking, and I suspect that no one ever does
42 * that.
43 */
44struct BITSFUNC(fake_sections)
45{
46 ELF(Shdr) *table;
47 unsigned long table_offset;
48 int count, max_count;
49
50 int in_shstrndx;
51 unsigned long shstr_offset;
52 const char *shstrtab;
53 size_t shstrtab_len;
54
55 int out_shstrndx;
56};
57
58static unsigned int BITSFUNC(find_shname)(struct BITSFUNC(fake_sections) *out,
59 const char *name)
60{
61 const char *outname = out->shstrtab;
62 while (outname - out->shstrtab < out->shstrtab_len) {
63 if (!strcmp(name, outname))
64 return (outname - out->shstrtab) + out->shstr_offset;
65 outname += strlen(outname) + 1;
66 }
67
68 if (*name)
69 printf("Warning: could not find output name \"%s\"\n", name);
70 return out->shstr_offset + out->shstrtab_len - 1; /* Use a null. */
71}
72
73static void BITSFUNC(init_sections)(struct BITSFUNC(fake_sections) *out)
74{
75 if (!out->in_shstrndx)
76 fail("didn't find the fake shstrndx\n");
77
78 memset(out->table, 0, out->max_count * sizeof(ELF(Shdr)));
79
80 if (out->max_count < 1)
81 fail("we need at least two fake output sections\n");
82
83 PUT_LE(&out->table[0].sh_type, SHT_NULL);
84 PUT_LE(&out->table[0].sh_name, BITSFUNC(find_shname)(out, ""));
85
86 out->count = 1;
87}
88
89static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out,
90 int in_idx, const ELF(Shdr) *in,
91 const char *name)
92{
93 uint64_t flags = GET_LE(&in->sh_flags);
94
95 bool copy = flags & SHF_ALLOC &&
96 (GET_LE(&in->sh_size) ||
97 (GET_LE(&in->sh_type) != SHT_RELA &&
98 GET_LE(&in->sh_type) != SHT_REL)) &&
99 strcmp(name, ".altinstructions") &&
100 strcmp(name, ".altinstr_replacement");
101
102 if (!copy)
103 return;
104
105 if (out->count >= out->max_count)
106 fail("too many copied sections (max = %d)\n", out->max_count);
107
108 if (in_idx == out->in_shstrndx)
109 out->out_shstrndx = out->count;
110
111 out->table[out->count] = *in;
112 PUT_LE(&out->table[out->count].sh_name,
113 BITSFUNC(find_shname)(out, name));
114
115 /* elfutils requires that a strtab have the correct type. */
116 if (!strcmp(name, ".fake_shstrtab"))
117 PUT_LE(&out->table[out->count].sh_type, SHT_STRTAB);
118
119 out->count++;
120}
121
122static void BITSFUNC(go)(void *addr, size_t len,
123 FILE *outfile, const char *name) 9 FILE *outfile, const char *name)
124{ 10{
125 int found_load = 0; 11 int found_load = 0;
126 unsigned long load_size = -1; /* Work around bogus warning */ 12 unsigned long load_size = -1; /* Work around bogus warning */
127 unsigned long data_size; 13 unsigned long mapping_size;
128 ELF(Ehdr) *hdr = (ELF(Ehdr) *)addr; 14 ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
129 int i; 15 int i;
130 unsigned long j; 16 unsigned long j;
131 ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, 17 ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
132 *alt_sec = NULL; 18 *alt_sec = NULL;
133 ELF(Dyn) *dyn = 0, *dyn_end = 0; 19 ELF(Dyn) *dyn = 0, *dyn_end = 0;
134 const char *secstrings; 20 const char *secstrings;
135 uint64_t syms[NSYMS] = {}; 21 INT_BITS syms[NSYMS] = {};
136
137 struct BITSFUNC(fake_sections) fake_sections = {};
138 22
139 ELF(Phdr) *pt = (ELF(Phdr) *)(addr + GET_LE(&hdr->e_phoff)); 23 ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
140 24
141 /* Walk the segment table. */ 25 /* Walk the segment table. */
142 for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { 26 for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
@@ -154,14 +38,16 @@ static void BITSFUNC(go)(void *addr, size_t len,
154 load_size = GET_LE(&pt[i].p_memsz); 38 load_size = GET_LE(&pt[i].p_memsz);
155 found_load = 1; 39 found_load = 1;
156 } else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) { 40 } else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) {
157 dyn = addr + GET_LE(&pt[i].p_offset); 41 dyn = raw_addr + GET_LE(&pt[i].p_offset);
158 dyn_end = addr + GET_LE(&pt[i].p_offset) + 42 dyn_end = raw_addr + GET_LE(&pt[i].p_offset) +
159 GET_LE(&pt[i].p_memsz); 43 GET_LE(&pt[i].p_memsz);
160 } 44 }
161 } 45 }
162 if (!found_load) 46 if (!found_load)
163 fail("no PT_LOAD seg\n"); 47 fail("no PT_LOAD seg\n");
164 data_size = (load_size + 4095) / 4096 * 4096; 48
49 if (stripped_len < load_size)
50 fail("stripped input is too short\n");
165 51
166 /* Walk the dynamic table */ 52 /* Walk the dynamic table */
167 for (i = 0; dyn + i < dyn_end && 53 for (i = 0; dyn + i < dyn_end &&
@@ -173,11 +59,11 @@ static void BITSFUNC(go)(void *addr, size_t len,
173 } 59 }
174 60
175 /* Walk the section table */ 61 /* Walk the section table */
176 secstrings_hdr = addr + GET_LE(&hdr->e_shoff) + 62 secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
177 GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx); 63 GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
178 secstrings = addr + GET_LE(&secstrings_hdr->sh_offset); 64 secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset);
179 for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { 65 for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
180 ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) + 66 ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) +
181 GET_LE(&hdr->e_shentsize) * i; 67 GET_LE(&hdr->e_shentsize) * i;
182 if (GET_LE(&sh->sh_type) == SHT_SYMTAB) 68 if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
183 symtab_hdr = sh; 69 symtab_hdr = sh;
@@ -190,7 +76,7 @@ static void BITSFUNC(go)(void *addr, size_t len,
190 if (!symtab_hdr) 76 if (!symtab_hdr)
191 fail("no symbol table\n"); 77 fail("no symbol table\n");
192 78
193 strtab_hdr = addr + GET_LE(&hdr->e_shoff) + 79 strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
194 GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link); 80 GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
195 81
196 /* Walk the symbol table */ 82 /* Walk the symbol table */
@@ -198,9 +84,9 @@ static void BITSFUNC(go)(void *addr, size_t len,
198 i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); 84 i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
199 i++) { 85 i++) {
200 int k; 86 int k;
201 ELF(Sym) *sym = addr + GET_LE(&symtab_hdr->sh_offset) + 87 ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
202 GET_LE(&symtab_hdr->sh_entsize) * i; 88 GET_LE(&symtab_hdr->sh_entsize) * i;
203 const char *name = addr + GET_LE(&strtab_hdr->sh_offset) + 89 const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
204 GET_LE(&sym->st_name); 90 GET_LE(&sym->st_name);
205 91
206 for (k = 0; k < NSYMS; k++) { 92 for (k = 0; k < NSYMS; k++) {
@@ -209,51 +95,17 @@ static void BITSFUNC(go)(void *addr, size_t len,
209 fail("duplicate symbol %s\n", 95 fail("duplicate symbol %s\n",
210 required_syms[k].name); 96 required_syms[k].name);
211 } 97 }
98
99 /*
100 * Careful: we use negative addresses, but
101 * st_value is unsigned, so we rely
102 * on syms[k] being a signed type of the
103 * correct width.
104 */
212 syms[k] = GET_LE(&sym->st_value); 105 syms[k] = GET_LE(&sym->st_value);
213 } 106 }
214 } 107 }
215
216 if (!strcmp(name, "fake_shstrtab")) {
217 ELF(Shdr) *sh;
218
219 fake_sections.in_shstrndx = GET_LE(&sym->st_shndx);
220 fake_sections.shstrtab = addr + GET_LE(&sym->st_value);
221 fake_sections.shstrtab_len = GET_LE(&sym->st_size);
222 sh = addr + GET_LE(&hdr->e_shoff) +
223 GET_LE(&hdr->e_shentsize) *
224 fake_sections.in_shstrndx;
225 fake_sections.shstr_offset = GET_LE(&sym->st_value) -
226 GET_LE(&sh->sh_addr);
227 }
228 }
229
230 /* Build the output section table. */
231 if (!syms[sym_VDSO_FAKE_SECTION_TABLE_START] ||
232 !syms[sym_VDSO_FAKE_SECTION_TABLE_END])
233 fail("couldn't find fake section table\n");
234 if ((syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
235 syms[sym_VDSO_FAKE_SECTION_TABLE_START]) % sizeof(ELF(Shdr)))
236 fail("fake section table size isn't a multiple of sizeof(Shdr)\n");
237 fake_sections.table = addr + syms[sym_VDSO_FAKE_SECTION_TABLE_START];
238 fake_sections.table_offset = syms[sym_VDSO_FAKE_SECTION_TABLE_START];
239 fake_sections.max_count = (syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
240 syms[sym_VDSO_FAKE_SECTION_TABLE_START]) /
241 sizeof(ELF(Shdr));
242
243 BITSFUNC(init_sections)(&fake_sections);
244 for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
245 ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) +
246 GET_LE(&hdr->e_shentsize) * i;
247 BITSFUNC(copy_section)(&fake_sections, i, sh,
248 secstrings + GET_LE(&sh->sh_name));
249 } 108 }
250 if (!fake_sections.out_shstrndx)
251 fail("didn't generate shstrndx?!?\n");
252
253 PUT_LE(&hdr->e_shoff, fake_sections.table_offset);
254 PUT_LE(&hdr->e_shentsize, sizeof(ELF(Shdr)));
255 PUT_LE(&hdr->e_shnum, fake_sections.count);
256 PUT_LE(&hdr->e_shstrndx, fake_sections.out_shstrndx);
257 109
258 /* Validate mapping addresses. */ 110 /* Validate mapping addresses. */
259 for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { 111 for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
@@ -263,21 +115,23 @@ static void BITSFUNC(go)(void *addr, size_t len,
263 if (syms[i] % 4096) 115 if (syms[i] % 4096)
264 fail("%s must be a multiple of 4096\n", 116 fail("%s must be a multiple of 4096\n",
265 required_syms[i].name); 117 required_syms[i].name);
266 if (syms[i] < data_size) 118 if (syms[sym_vvar_start] > syms[i] + 4096)
267 fail("%s must be after the text mapping\n", 119 fail("%s underruns begin_vvar\n",
268 required_syms[i].name); 120 required_syms[i].name);
269 if (syms[sym_end_mapping] < syms[i] + 4096) 121 if (syms[i] + 4096 > 0)
270 fail("%s overruns end_mapping\n", 122 fail("%s is on the wrong side of the vdso text\n",
271 required_syms[i].name); 123 required_syms[i].name);
272 } 124 }
273 if (syms[sym_end_mapping] % 4096) 125 if (syms[sym_vvar_start] % 4096)
274 fail("end_mapping must be a multiple of 4096\n"); 126 fail("vvar_begin must be a multiple of 4096\n");
275 127
276 if (!name) { 128 if (!name) {
277 fwrite(addr, load_size, 1, outfile); 129 fwrite(stripped_addr, stripped_len, 1, outfile);
278 return; 130 return;
279 } 131 }
280 132
133 mapping_size = (stripped_len + 4095) / 4096 * 4096;
134
281 fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n"); 135 fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
282 fprintf(outfile, "#include <linux/linkage.h>\n"); 136 fprintf(outfile, "#include <linux/linkage.h>\n");
283 fprintf(outfile, "#include <asm/page_types.h>\n"); 137 fprintf(outfile, "#include <asm/page_types.h>\n");
@@ -285,20 +139,21 @@ static void BITSFUNC(go)(void *addr, size_t len,
285 fprintf(outfile, "\n"); 139 fprintf(outfile, "\n");
286 fprintf(outfile, 140 fprintf(outfile,
287 "static unsigned char raw_data[%lu] __page_aligned_data = {", 141 "static unsigned char raw_data[%lu] __page_aligned_data = {",
288 data_size); 142 mapping_size);
289 for (j = 0; j < load_size; j++) { 143 for (j = 0; j < stripped_len; j++) {
290 if (j % 10 == 0) 144 if (j % 10 == 0)
291 fprintf(outfile, "\n\t"); 145 fprintf(outfile, "\n\t");
292 fprintf(outfile, "0x%02X, ", (int)((unsigned char *)addr)[j]); 146 fprintf(outfile, "0x%02X, ",
147 (int)((unsigned char *)stripped_addr)[j]);
293 } 148 }
294 fprintf(outfile, "\n};\n\n"); 149 fprintf(outfile, "\n};\n\n");
295 150
296 fprintf(outfile, "static struct page *pages[%lu];\n\n", 151 fprintf(outfile, "static struct page *pages[%lu];\n\n",
297 data_size / 4096); 152 mapping_size / 4096);
298 153
299 fprintf(outfile, "const struct vdso_image %s = {\n", name); 154 fprintf(outfile, "const struct vdso_image %s = {\n", name);
300 fprintf(outfile, "\t.data = raw_data,\n"); 155 fprintf(outfile, "\t.data = raw_data,\n");
301 fprintf(outfile, "\t.size = %lu,\n", data_size); 156 fprintf(outfile, "\t.size = %lu,\n", mapping_size);
302 fprintf(outfile, "\t.text_mapping = {\n"); 157 fprintf(outfile, "\t.text_mapping = {\n");
303 fprintf(outfile, "\t\t.name = \"[vdso]\",\n"); 158 fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
304 fprintf(outfile, "\t\t.pages = pages,\n"); 159 fprintf(outfile, "\t\t.pages = pages,\n");
@@ -311,8 +166,8 @@ static void BITSFUNC(go)(void *addr, size_t len,
311 } 166 }
312 for (i = 0; i < NSYMS; i++) { 167 for (i = 0; i < NSYMS; i++) {
313 if (required_syms[i].export && syms[i]) 168 if (required_syms[i].export && syms[i])
314 fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n", 169 fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
315 required_syms[i].name, syms[i]); 170 required_syms[i].name, (int64_t)syms[i]);
316 } 171 }
317 fprintf(outfile, "};\n"); 172 fprintf(outfile, "};\n");
318} 173}
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 5a5176de8d0a..970463b566cf 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -93,7 +93,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
93{ 93{
94 struct mm_struct *mm = current->mm; 94 struct mm_struct *mm = current->mm;
95 struct vm_area_struct *vma; 95 struct vm_area_struct *vma;
96 unsigned long addr; 96 unsigned long addr, text_start;
97 int ret = 0; 97 int ret = 0;
98 static struct page *no_pages[] = {NULL}; 98 static struct page *no_pages[] = {NULL};
99 static struct vm_special_mapping vvar_mapping = { 99 static struct vm_special_mapping vvar_mapping = {
@@ -103,26 +103,28 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
103 103
104 if (calculate_addr) { 104 if (calculate_addr) {
105 addr = vdso_addr(current->mm->start_stack, 105 addr = vdso_addr(current->mm->start_stack,
106 image->sym_end_mapping); 106 image->size - image->sym_vvar_start);
107 } else { 107 } else {
108 addr = 0; 108 addr = 0;
109 } 109 }
110 110
111 down_write(&mm->mmap_sem); 111 down_write(&mm->mmap_sem);
112 112
113 addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0); 113 addr = get_unmapped_area(NULL, addr,
114 image->size - image->sym_vvar_start, 0, 0);
114 if (IS_ERR_VALUE(addr)) { 115 if (IS_ERR_VALUE(addr)) {
115 ret = addr; 116 ret = addr;
116 goto up_fail; 117 goto up_fail;
117 } 118 }
118 119
119 current->mm->context.vdso = (void __user *)addr; 120 text_start = addr - image->sym_vvar_start;
121 current->mm->context.vdso = (void __user *)text_start;
120 122
121 /* 123 /*
122 * MAYWRITE to allow gdb to COW and set breakpoints 124 * MAYWRITE to allow gdb to COW and set breakpoints
123 */ 125 */
124 vma = _install_special_mapping(mm, 126 vma = _install_special_mapping(mm,
125 addr, 127 text_start,
126 image->size, 128 image->size,
127 VM_READ|VM_EXEC| 129 VM_READ|VM_EXEC|
128 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 130 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
@@ -134,9 +136,9 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
134 } 136 }
135 137
136 vma = _install_special_mapping(mm, 138 vma = _install_special_mapping(mm,
137 addr + image->size, 139 addr,
138 image->sym_end_mapping - image->size, 140 -image->sym_vvar_start,
139 VM_READ, 141 VM_READ|VM_MAYREAD,
140 &vvar_mapping); 142 &vvar_mapping);
141 143
142 if (IS_ERR(vma)) { 144 if (IS_ERR(vma)) {
@@ -146,7 +148,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
146 148
147 if (image->sym_vvar_page) 149 if (image->sym_vvar_page)
148 ret = remap_pfn_range(vma, 150 ret = remap_pfn_range(vma,
149 addr + image->sym_vvar_page, 151 text_start + image->sym_vvar_page,
150 __pa_symbol(&__vvar_page) >> PAGE_SHIFT, 152 __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
151 PAGE_SIZE, 153 PAGE_SIZE,
152 PAGE_READONLY); 154 PAGE_READONLY);
@@ -157,7 +159,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
157#ifdef CONFIG_HPET_TIMER 159#ifdef CONFIG_HPET_TIMER
158 if (hpet_address && image->sym_hpet_page) { 160 if (hpet_address && image->sym_hpet_page) {
159 ret = io_remap_pfn_range(vma, 161 ret = io_remap_pfn_range(vma,
160 addr + image->sym_hpet_page, 162 text_start + image->sym_hpet_page,
161 hpet_address >> PAGE_SHIFT, 163 hpet_address >> PAGE_SHIFT,
162 PAGE_SIZE, 164 PAGE_SIZE,
163 pgprot_noncached(PAGE_READONLY)); 165 pgprot_noncached(PAGE_READONLY));