aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 15:10:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 15:10:24 -0500
commitb6444bd0a18eb47343e16749ce80a6ebd521f124 (patch)
tree989881a237552dbe3fb36df45b4eda6dbd5fc09f /arch/x86
parent9d0cf6f56454c8a71e0aa2c3b9c6cbe470eb2788 (diff)
parent97b67ae559947f1e208439a1bf6a734da3087006 (diff)
Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 boot and percpu updates from Ingo Molnar: "This tree contains a bootable images documentation update plus three slightly misplaced x86/asm percpu changes/optimizations" * 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86-64: Use RIP-relative addressing for most per-CPU accesses x86-64: Handle PC-relative relocations on per-CPU data x86: Convert a few more per-CPU items to read-mostly ones x86, boot: Document intermediates more clearly
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/Makefile12
-rw-r--r--arch/x86/boot/compressed/misc.c14
-rw-r--r--arch/x86/include/asm/percpu.h61
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/tools/relocs.c36
8 files changed, 103 insertions, 30 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 45abc363dd3e..65516ab0cabe 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -3,6 +3,18 @@
3# 3#
4# create a compressed vmlinux image from the original vmlinux 4# create a compressed vmlinux image from the original vmlinux
5# 5#
6# vmlinuz is:
7# decompression code (*.o)
8# asm globals (piggy.S), including:
9# vmlinux.bin.(gz|bz2|lzma|...)
10#
11# vmlinux.bin is:
12# vmlinux stripped of debugging and comments
13# vmlinux.bin.all is:
14# vmlinux.bin + vmlinux.relocs
15# vmlinux.bin.(gz|bz2|lzma|...) is:
16# (see scripts/Makefile.lib size_append)
17# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
6 18
7targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ 19targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
8 vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 20 vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 30dd59a9f0b4..dcc1c536cc21 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -260,7 +260,7 @@ static void handle_relocations(void *output, unsigned long output_len)
260 260
261 /* 261 /*
262 * Process relocations: 32 bit relocations first then 64 bit after. 262 * Process relocations: 32 bit relocations first then 64 bit after.
263 * Two sets of binary relocations are added to the end of the kernel 263 * Three sets of binary relocations are added to the end of the kernel
264 * before compression. Each relocation table entry is the kernel 264 * before compression. Each relocation table entry is the kernel
265 * address of the location which needs to be updated stored as a 265 * address of the location which needs to be updated stored as a
266 * 32-bit value which is sign extended to 64 bits. 266 * 32-bit value which is sign extended to 64 bits.
@@ -270,6 +270,8 @@ static void handle_relocations(void *output, unsigned long output_len)
270 * kernel bits... 270 * kernel bits...
271 * 0 - zero terminator for 64 bit relocations 271 * 0 - zero terminator for 64 bit relocations
272 * 64 bit relocation repeated 272 * 64 bit relocation repeated
273 * 0 - zero terminator for inverse 32 bit relocations
274 * 32 bit inverse relocation repeated
273 * 0 - zero terminator for 32 bit relocations 275 * 0 - zero terminator for 32 bit relocations
274 * 32 bit relocation repeated 276 * 32 bit relocation repeated
275 * 277 *
@@ -286,6 +288,16 @@ static void handle_relocations(void *output, unsigned long output_len)
286 *(uint32_t *)ptr += delta; 288 *(uint32_t *)ptr += delta;
287 } 289 }
288#ifdef CONFIG_X86_64 290#ifdef CONFIG_X86_64
291 while (*--reloc) {
292 long extended = *reloc;
293 extended += map;
294
295 ptr = (unsigned long)extended;
296 if (ptr < min_addr || ptr > max_addr)
297 error("inverse 32-bit relocation outside of kernel!\n");
298
299 *(int32_t *)ptr -= delta;
300 }
289 for (reloc--; *reloc; reloc--) { 301 for (reloc--; *reloc; reloc--) {
290 long extended = *reloc; 302 long extended = *reloc;
291 extended += map; 303 extended += map;
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index fd472181a1d0..e0ba66ca68c6 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -64,7 +64,7 @@
64#define __percpu_prefix "" 64#define __percpu_prefix ""
65#endif 65#endif
66 66
67#define __percpu_arg(x) __percpu_prefix "%P" #x 67#define __percpu_arg(x) __percpu_prefix "%" #x
68 68
69/* 69/*
70 * Initialized pointers to per-cpu variables needed for the boot 70 * Initialized pointers to per-cpu variables needed for the boot
@@ -179,29 +179,58 @@ do { \
179 } \ 179 } \
180} while (0) 180} while (0)
181 181
182#define percpu_from_op(op, var, constraint) \ 182#define percpu_from_op(op, var) \
183({ \ 183({ \
184 typeof(var) pfo_ret__; \ 184 typeof(var) pfo_ret__; \
185 switch (sizeof(var)) { \ 185 switch (sizeof(var)) { \
186 case 1: \ 186 case 1: \
187 asm(op "b "__percpu_arg(1)",%0" \ 187 asm(op "b "__percpu_arg(1)",%0" \
188 : "=q" (pfo_ret__) \ 188 : "=q" (pfo_ret__) \
189 : constraint); \ 189 : "m" (var)); \
190 break; \ 190 break; \
191 case 2: \ 191 case 2: \
192 asm(op "w "__percpu_arg(1)",%0" \ 192 asm(op "w "__percpu_arg(1)",%0" \
193 : "=r" (pfo_ret__) \ 193 : "=r" (pfo_ret__) \
194 : constraint); \ 194 : "m" (var)); \
195 break; \ 195 break; \
196 case 4: \ 196 case 4: \
197 asm(op "l "__percpu_arg(1)",%0" \ 197 asm(op "l "__percpu_arg(1)",%0" \
198 : "=r" (pfo_ret__) \ 198 : "=r" (pfo_ret__) \
199 : constraint); \ 199 : "m" (var)); \
200 break; \ 200 break; \
201 case 8: \ 201 case 8: \
202 asm(op "q "__percpu_arg(1)",%0" \ 202 asm(op "q "__percpu_arg(1)",%0" \
203 : "=r" (pfo_ret__) \ 203 : "=r" (pfo_ret__) \
204 : constraint); \ 204 : "m" (var)); \
205 break; \
206 default: __bad_percpu_size(); \
207 } \
208 pfo_ret__; \
209})
210
211#define percpu_stable_op(op, var) \
212({ \
213 typeof(var) pfo_ret__; \
214 switch (sizeof(var)) { \
215 case 1: \
216 asm(op "b "__percpu_arg(P1)",%0" \
217 : "=q" (pfo_ret__) \
218 : "p" (&(var))); \
219 break; \
220 case 2: \
221 asm(op "w "__percpu_arg(P1)",%0" \
222 : "=r" (pfo_ret__) \
223 : "p" (&(var))); \
224 break; \
225 case 4: \
226 asm(op "l "__percpu_arg(P1)",%0" \
227 : "=r" (pfo_ret__) \
228 : "p" (&(var))); \
229 break; \
230 case 8: \
231 asm(op "q "__percpu_arg(P1)",%0" \
232 : "=r" (pfo_ret__) \
233 : "p" (&(var))); \
205 break; \ 234 break; \
206 default: __bad_percpu_size(); \ 235 default: __bad_percpu_size(); \
207 } \ 236 } \
@@ -359,11 +388,11 @@ do { \
359 * per-thread variables implemented as per-cpu variables and thus 388 * per-thread variables implemented as per-cpu variables and thus
360 * stable for the duration of the respective task. 389 * stable for the duration of the respective task.
361 */ 390 */
362#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) 391#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
363 392
364#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 393#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
365#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 394#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
366#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 395#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
367 396
368#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 397#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
369#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 398#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
@@ -381,9 +410,9 @@ do { \
381#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) 410#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
382#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) 411#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
383 412
384#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 413#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
385#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 414#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
386#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 415#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
387#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) 416#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
388#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) 417#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
389#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) 418#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
@@ -435,7 +464,7 @@ do { \
435 * 32 bit must fall back to generic operations. 464 * 32 bit must fall back to generic operations.
436 */ 465 */
437#ifdef CONFIG_X86_64 466#ifdef CONFIG_X86_64
438#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 467#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
439#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 468#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
440#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 469#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
441#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 470#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
@@ -444,7 +473,7 @@ do { \
444#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) 473#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
445#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) 474#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
446 475
447#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 476#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
448#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 477#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
449#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 478#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
450#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 479#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
@@ -522,7 +551,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr,
522#include <asm-generic/percpu.h> 551#include <asm-generic/percpu.h>
523 552
524/* We can use this directly for local CPU (faster). */ 553/* We can use this directly for local CPU (faster). */
525DECLARE_PER_CPU(unsigned long, this_cpu_off); 554DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
526 555
527#endif /* !__ASSEMBLY__ */ 556#endif /* !__ASSEMBLY__ */
528 557
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9617a1716813..25b8de0f21c0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -127,7 +127,7 @@ struct cpuinfo_x86 {
127 /* Index into per_cpu list: */ 127 /* Index into per_cpu list: */
128 u16 cpu_index; 128 u16 cpu_index;
129 u32 microcode; 129 u32 microcode;
130} __attribute__((__aligned__(SMP_CACHE_BYTES))); 130};
131 131
132#define X86_VENDOR_INTEL 0 132#define X86_VENDOR_INTEL 0
133#define X86_VENDOR_CYRIX 1 133#define X86_VENDOR_CYRIX 1
@@ -151,7 +151,7 @@ extern __u32 cpu_caps_cleared[NCAPINTS];
151extern __u32 cpu_caps_set[NCAPINTS]; 151extern __u32 cpu_caps_set[NCAPINTS];
152 152
153#ifdef CONFIG_SMP 153#ifdef CONFIG_SMP
154DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 154DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
155#define cpu_data(cpu) per_cpu(cpu_info, cpu) 155#define cpu_data(cpu) per_cpu(cpu_info, cpu)
156#else 156#else
157#define cpu_info boot_cpu_data 157#define cpu_info boot_cpu_data
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5cdff0357746..e4fcb87ba7a6 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -30,7 +30,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
30#define BOOT_PERCPU_OFFSET 0 30#define BOOT_PERCPU_OFFSET 0
31#endif 31#endif
32 32
33DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; 33DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
34EXPORT_PER_CPU_SYMBOL(this_cpu_off); 34EXPORT_PER_CPU_SYMBOL(this_cpu_off);
35 35
36unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { 36unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 668d8f2a8781..7a8f5845e8eb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -99,7 +99,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
99DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 99DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
100 100
101/* Per CPU bogomips and other parameters */ 101/* Per CPU bogomips and other parameters */
102DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 102DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
103EXPORT_PER_CPU_SYMBOL(cpu_info); 103EXPORT_PER_CPU_SYMBOL(cpu_info);
104 104
105atomic_t init_deasserted; 105atomic_t init_deasserted;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 49edf2dd3613..00bf300fd846 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -186,6 +186,8 @@ SECTIONS
186 * start another segment - init. 186 * start another segment - init.
187 */ 187 */
188 PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) 188 PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
189 ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
190 "per-CPU data too large - increase CONFIG_PHYSICAL_START")
189#endif 191#endif
190 192
191 INIT_TEXT_SECTION(PAGE_SIZE) 193 INIT_TEXT_SECTION(PAGE_SIZE)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index a5efb21d5228..0c2fae8d929d 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -20,7 +20,10 @@ struct relocs {
20 20
21static struct relocs relocs16; 21static struct relocs relocs16;
22static struct relocs relocs32; 22static struct relocs relocs32;
23#if ELF_BITS == 64
24static struct relocs relocs32neg;
23static struct relocs relocs64; 25static struct relocs relocs64;
26#endif
24 27
25struct section { 28struct section {
26 Elf_Shdr shdr; 29 Elf_Shdr shdr;
@@ -762,11 +765,16 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
762 765
763 switch (r_type) { 766 switch (r_type) {
764 case R_X86_64_NONE: 767 case R_X86_64_NONE:
768 /* NONE can be ignored. */
769 break;
770
765 case R_X86_64_PC32: 771 case R_X86_64_PC32:
766 /* 772 /*
767 * NONE can be ignored and PC relative relocations don't 773 * PC relative relocations don't need to be adjusted unless
768 * need to be adjusted. 774 * referencing a percpu symbol.
769 */ 775 */
776 if (is_percpu_sym(sym, symname))
777 add_reloc(&relocs32neg, offset);
770 break; 778 break;
771 779
772 case R_X86_64_32: 780 case R_X86_64_32:
@@ -986,7 +994,10 @@ static void emit_relocs(int as_text, int use_real_mode)
986 /* Order the relocations for more efficient processing */ 994 /* Order the relocations for more efficient processing */
987 sort_relocs(&relocs16); 995 sort_relocs(&relocs16);
988 sort_relocs(&relocs32); 996 sort_relocs(&relocs32);
997#if ELF_BITS == 64
998 sort_relocs(&relocs32neg);
989 sort_relocs(&relocs64); 999 sort_relocs(&relocs64);
1000#endif
990 1001
991 /* Print the relocations */ 1002 /* Print the relocations */
992 if (as_text) { 1003 if (as_text) {
@@ -1007,14 +1018,21 @@ static void emit_relocs(int as_text, int use_real_mode)
1007 for (i = 0; i < relocs32.count; i++) 1018 for (i = 0; i < relocs32.count; i++)
1008 write_reloc(relocs32.offset[i], stdout); 1019 write_reloc(relocs32.offset[i], stdout);
1009 } else { 1020 } else {
1010 if (ELF_BITS == 64) { 1021#if ELF_BITS == 64
1011 /* Print a stop */ 1022 /* Print a stop */
1012 write_reloc(0, stdout); 1023 write_reloc(0, stdout);
1013 1024
1014 /* Now print each relocation */ 1025 /* Now print each relocation */
1015 for (i = 0; i < relocs64.count; i++) 1026 for (i = 0; i < relocs64.count; i++)
1016 write_reloc(relocs64.offset[i], stdout); 1027 write_reloc(relocs64.offset[i], stdout);
1017 } 1028
1029 /* Print a stop */
1030 write_reloc(0, stdout);
1031
1032 /* Now print each inverse 32-bit relocation */
1033 for (i = 0; i < relocs32neg.count; i++)
1034 write_reloc(relocs32neg.offset[i], stdout);
1035#endif
1018 1036
1019 /* Print a stop */ 1037 /* Print a stop */
1020 write_reloc(0, stdout); 1038 write_reloc(0, stdout);