aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/entry_64.txt7
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/x86/include/asm/dma.h2
-rw-r--r--arch/x86/include/asm/segment.h30
-rw-r--r--arch/x86/kernel/asm-offsets_32.c4
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/process_64.c101
-rw-r--r--arch/x86/kernel/tls.c45
-rw-r--r--arch/x86/mm/init.c4
10 files changed, 152 insertions, 56 deletions
diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt
index bc7226ef5055..4a1c5c2dc5a9 100644
--- a/Documentation/x86/entry_64.txt
+++ b/Documentation/x86/entry_64.txt
@@ -7,9 +7,12 @@ http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu>
7The x86 architecture has quite a few different ways to jump into 7The x86 architecture has quite a few different ways to jump into
8kernel code. Most of these entry points are registered in 8kernel code. Most of these entry points are registered in
9arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S 9arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S
10and arch/x86/ia32/ia32entry.S. 10for 64-bit, arch/x86/kernel/entry_32.S for 32-bit and finally
11arch/x86/ia32/ia32entry.S which implements the 32-bit compatibility
12syscall entry points and thus provides for 32-bit processes the
13ability to execute syscalls when running on 64-bit kernels.
11 14
12The IDT vector assignments are listed in arch/x86/include/irq_vectors.h. 15The IDT vector assignments are listed in arch/x86/include/asm/irq_vectors.h.
13 16
14Some of these entries are: 17Some of these entries are:
15 18
diff --git a/MAINTAINERS b/MAINTAINERS
index f273cb58d2e8..9ef141a913cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10485,6 +10485,13 @@ L: linux-edac@vger.kernel.org
10485S: Maintained 10485S: Maintained
10486F: arch/x86/kernel/cpu/mcheck/* 10486F: arch/x86/kernel/cpu/mcheck/*
10487 10487
10488X86 VDSO
10489M: Andy Lutomirski <luto@amacapital.net>
10490L: linux-kernel@vger.kernel.org
10491T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso
10492S: Maintained
10493F: arch/x86/vdso/
10494
10488XC2028/3028 TUNER DRIVER 10495XC2028/3028 TUNER DRIVER
10489M: Mauro Carvalho Chehab <mchehab@osg.samsung.com> 10496M: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
10490L: linux-media@vger.kernel.org 10497L: linux-media@vger.kernel.org
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 0bdb0c54d9a1..fe884e18fa6e 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -70,7 +70,7 @@
70#define MAX_DMA_CHANNELS 8 70#define MAX_DMA_CHANNELS 8
71 71
72/* 16MB ISA DMA zone */ 72/* 16MB ISA DMA zone */
73#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) 73#define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT)
74 74
75/* 4GB broken PCI/AGP hardware bus master zone */ 75/* 4GB broken PCI/AGP hardware bus master zone */
76#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) 76#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 6f1c3a8a33ab..db257a58571f 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -23,6 +23,15 @@
23#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) 23#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2)
24#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) 24#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8)
25 25
26#define SEGMENT_RPL_MASK 0x3 /*
27 * Bottom two bits of selector give the ring
28 * privilege level
29 */
30#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */
31#define USER_RPL 0x3 /* User mode is privilege level 3 */
32#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
33#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */
34
26#ifdef CONFIG_X86_32 35#ifdef CONFIG_X86_32
27/* 36/*
28 * The layout of the per-CPU GDT under Linux: 37 * The layout of the per-CPU GDT under Linux:
@@ -125,16 +134,6 @@
125#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ 134#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
126#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ 135#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
127 136
128/* Bottom two bits of selector give the ring privilege level */
129#define SEGMENT_RPL_MASK 0x3
130/* Bit 2 is table indicator (LDT/GDT) */
131#define SEGMENT_TI_MASK 0x4
132
133/* User mode is privilege level 3 */
134#define USER_RPL 0x3
135/* LDT segment has TI set, GDT has it cleared */
136#define SEGMENT_LDT 0x4
137#define SEGMENT_GDT 0x0
138 137
139/* 138/*
140 * Matching rules for certain types of segments. 139 * Matching rules for certain types of segments.
@@ -192,17 +191,6 @@
192#define get_kernel_rpl() 0 191#define get_kernel_rpl() 0
193#endif 192#endif
194 193
195/* User mode is privilege level 3 */
196#define USER_RPL 0x3
197/* LDT segment has TI set, GDT has it cleared */
198#define SEGMENT_LDT 0x4
199#define SEGMENT_GDT 0x0
200
201/* Bottom two bits of selector give the ring privilege level */
202#define SEGMENT_RPL_MASK 0x3
203/* Bit 2 is table indicator (LDT/GDT) */
204#define SEGMENT_TI_MASK 0x4
205
206#define IDT_ENTRIES 256 194#define IDT_ENTRIES 256
207#define NUM_EXCEPTION_VECTORS 32 195#define NUM_EXCEPTION_VECTORS 32
208/* Bitmask of exception vectors which push an error code on the stack */ 196/* Bitmask of exception vectors which push an error code on the stack */
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index d67c4be3e8b1..3b3b9d33ac1d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,3 +1,7 @@
1#ifndef __LINUX_KBUILD_H
2# error "Please do not build this file directly, build asm-offsets.c instead"
3#endif
4
1#include <asm/ucontext.h> 5#include <asm/ucontext.h>
2 6
3#include <linux/lguest.h> 7#include <linux/lguest.h>
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4f9359f36bb7..fdcbb4d27c9f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,3 +1,7 @@
1#ifndef __LINUX_KBUILD_H
2# error "Please do not build this file directly, build asm-offsets.c instead"
3#endif
4
1#include <asm/ia32.h> 5#include <asm/ia32.h>
2 6
3#define __SYSCALL_64(nr, sym, compat) [nr] = 1, 7#define __SYSCALL_64(nr, sym, compat) [nr] = 1,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 49f886481615..dd2f07ae9d0c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1114,8 +1114,8 @@ void __init memblock_find_dma_reserve(void)
1114 * at first, and assume boot_mem will not take below MAX_DMA_PFN 1114 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1115 */ 1115 */
1116 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { 1116 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
1117 start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN); 1117 start_pfn = min(start_pfn, MAX_DMA_PFN);
1118 end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN); 1118 end_pfn = min(end_pfn, MAX_DMA_PFN);
1119 nr_pages += end_pfn - start_pfn; 1119 nr_pages += end_pfn - start_pfn;
1120 } 1120 }
1121 1121
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3ed4a68d4013..5a2c02913af3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
283 283
284 fpu = switch_fpu_prepare(prev_p, next_p, cpu); 284 fpu = switch_fpu_prepare(prev_p, next_p, cpu);
285 285
286 /* 286 /* Reload esp0 and ss1. */
287 * Reload esp0, LDT and the page table pointer:
288 */
289 load_sp0(tss, next); 287 load_sp0(tss, next);
290 288
291 /*
292 * Switch DS and ES.
293 * This won't pick up thread selector changes, but I guess that is ok.
294 */
295 savesegment(es, prev->es);
296 if (unlikely(next->es | prev->es))
297 loadsegment(es, next->es);
298
299 savesegment(ds, prev->ds);
300 if (unlikely(next->ds | prev->ds))
301 loadsegment(ds, next->ds);
302
303
304 /* We must save %fs and %gs before load_TLS() because 289 /* We must save %fs and %gs before load_TLS() because
305 * %fs and %gs may be cleared by load_TLS(). 290 * %fs and %gs may be cleared by load_TLS().
306 * 291 *
@@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
309 savesegment(fs, fsindex); 294 savesegment(fs, fsindex);
310 savesegment(gs, gsindex); 295 savesegment(gs, gsindex);
311 296
297 /*
298 * Load TLS before restoring any segments so that segment loads
299 * reference the correct GDT entries.
300 */
312 load_TLS(next, cpu); 301 load_TLS(next, cpu);
313 302
314 /* 303 /*
315 * Leave lazy mode, flushing any hypercalls made here. 304 * Leave lazy mode, flushing any hypercalls made here. This
316 * This must be done before restoring TLS segments so 305 * must be done after loading TLS entries in the GDT but before
317 * the GDT and LDT are properly updated, and must be 306 * loading segments that might reference them, and and it must
318 * done before math_state_restore, so the TS bit is up 307 * be done before math_state_restore, so the TS bit is up to
319 * to date. 308 * date.
320 */ 309 */
321 arch_end_context_switch(next_p); 310 arch_end_context_switch(next_p);
322 311
312 /* Switch DS and ES.
313 *
314 * Reading them only returns the selectors, but writing them (if
315 * nonzero) loads the full descriptor from the GDT or LDT. The
316 * LDT for next is loaded in switch_mm, and the GDT is loaded
317 * above.
318 *
319 * We therefore need to write new values to the segment
320 * registers on every context switch unless both the new and old
321 * values are zero.
322 *
323 * Note that we don't need to do anything for CS and SS, as
324 * those are saved and restored as part of pt_regs.
325 */
326 savesegment(es, prev->es);
327 if (unlikely(next->es | prev->es))
328 loadsegment(es, next->es);
329
330 savesegment(ds, prev->ds);
331 if (unlikely(next->ds | prev->ds))
332 loadsegment(ds, next->ds);
333
323 /* 334 /*
324 * Switch FS and GS. 335 * Switch FS and GS.
325 * 336 *
326 * Segment register != 0 always requires a reload. Also 337 * These are even more complicated than FS and GS: they have
327 * reload when it has changed. When prev process used 64bit 338 * 64-bit bases are that controlled by arch_prctl. Those bases
328 * base always reload to avoid an information leak. 339 * only differ from the values in the GDT or LDT if the selector
340 * is 0.
341 *
342 * Loading the segment register resets the hidden base part of
343 * the register to 0 or the value from the GDT / LDT. If the
344 * next base address zero, writing 0 to the segment register is
345 * much faster than using wrmsr to explicitly zero the base.
346 *
347 * The thread_struct.fs and thread_struct.gs values are 0
348 * if the fs and gs bases respectively are not overridden
349 * from the values implied by fsindex and gsindex. They
350 * are nonzero, and store the nonzero base addresses, if
351 * the bases are overridden.
352 *
353 * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
354 * be impossible.
355 *
356 * Therefore we need to reload the segment registers if either
357 * the old or new selector is nonzero, and we need to override
358 * the base address if next thread expects it to be overridden.
359 *
360 * This code is unnecessarily slow in the case where the old and
361 * new indexes are zero and the new base is nonzero -- it will
362 * unnecessarily write 0 to the selector before writing the new
363 * base address.
364 *
365 * Note: This all depends on arch_prctl being the only way that
366 * user code can override the segment base. Once wrfsbase and
367 * wrgsbase are enabled, most of this code will need to change.
329 */ 368 */
330 if (unlikely(fsindex | next->fsindex | prev->fs)) { 369 if (unlikely(fsindex | next->fsindex | prev->fs)) {
331 loadsegment(fs, next->fsindex); 370 loadsegment(fs, next->fsindex);
371
332 /* 372 /*
333 * Check if the user used a selector != 0; if yes 373 * If user code wrote a nonzero value to FS, then it also
334 * clear 64bit base, since overloaded base is always 374 * cleared the overridden base address.
335 * mapped to the Null selector 375 *
376 * XXX: if user code wrote 0 to FS and cleared the base
377 * address itself, we won't notice and we'll incorrectly
378 * restore the prior base address next time we reschdule
379 * the process.
336 */ 380 */
337 if (fsindex) 381 if (fsindex)
338 prev->fs = 0; 382 prev->fs = 0;
339 } 383 }
340 /* when next process has a 64bit base use it */
341 if (next->fs) 384 if (next->fs)
342 wrmsrl(MSR_FS_BASE, next->fs); 385 wrmsrl(MSR_FS_BASE, next->fs);
343 prev->fsindex = fsindex; 386 prev->fsindex = fsindex;
344 387
345 if (unlikely(gsindex | next->gsindex | prev->gs)) { 388 if (unlikely(gsindex | next->gsindex | prev->gs)) {
346 load_gs_index(next->gsindex); 389 load_gs_index(next->gsindex);
390
391 /* This works (and fails) the same way as fsindex above. */
347 if (gsindex) 392 if (gsindex)
348 prev->gs = 0; 393 prev->gs = 0;
349 } 394 }
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index f7fec09e3e3a..3e551eee87b9 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -27,6 +27,43 @@ static int get_free_idx(void)
27 return -ESRCH; 27 return -ESRCH;
28} 28}
29 29
30static bool tls_desc_okay(const struct user_desc *info)
31{
32 if (LDT_empty(info))
33 return true;
34
35 /*
36 * espfix is required for 16-bit data segments, but espfix
37 * only works for LDT segments.
38 */
39 if (!info->seg_32bit)
40 return false;
41
42 /* Only allow data segments in the TLS array. */
43 if (info->contents > 1)
44 return false;
45
46 /*
47 * Non-present segments with DPL 3 present an interesting attack
48 * surface. The kernel should handle such segments correctly,
49 * but TLS is very difficult to protect in a sandbox, so prevent
50 * such segments from being created.
51 *
52 * If userspace needs to remove a TLS entry, it can still delete
53 * it outright.
54 */
55 if (info->seg_not_present)
56 return false;
57
58#ifdef CONFIG_X86_64
59 /* The L bit makes no sense for data. */
60 if (info->lm)
61 return false;
62#endif
63
64 return true;
65}
66
30static void set_tls_desc(struct task_struct *p, int idx, 67static void set_tls_desc(struct task_struct *p, int idx,
31 const struct user_desc *info, int n) 68 const struct user_desc *info, int n)
32{ 69{
@@ -66,6 +103,9 @@ int do_set_thread_area(struct task_struct *p, int idx,
66 if (copy_from_user(&info, u_info, sizeof(info))) 103 if (copy_from_user(&info, u_info, sizeof(info)))
67 return -EFAULT; 104 return -EFAULT;
68 105
106 if (!tls_desc_okay(&info))
107 return -EINVAL;
108
69 if (idx == -1) 109 if (idx == -1)
70 idx = info.entry_number; 110 idx = info.entry_number;
71 111
@@ -192,6 +232,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
192{ 232{
193 struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; 233 struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
194 const struct user_desc *info; 234 const struct user_desc *info;
235 int i;
195 236
196 if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || 237 if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
197 (pos % sizeof(struct user_desc)) != 0 || 238 (pos % sizeof(struct user_desc)) != 0 ||
@@ -205,6 +246,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
205 else 246 else
206 info = infobuf; 247 info = infobuf;
207 248
249 for (i = 0; i < count / sizeof(struct user_desc); i++)
250 if (!tls_desc_okay(info + i))
251 return -EINVAL;
252
208 set_tls_desc(target, 253 set_tls_desc(target,
209 GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), 254 GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)),
210 info, count / sizeof(struct user_desc)); 255 info, count / sizeof(struct user_desc));
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 82b41d56bb98..a97ee0801475 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -703,10 +703,10 @@ void __init zone_sizes_init(void)
703 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 703 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
704 704
705#ifdef CONFIG_ZONE_DMA 705#ifdef CONFIG_ZONE_DMA
706 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 706 max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn);
707#endif 707#endif
708#ifdef CONFIG_ZONE_DMA32 708#ifdef CONFIG_ZONE_DMA32
709 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 709 max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn);
710#endif 710#endif
711 max_zone_pfns[ZONE_NORMAL] = max_low_pfn; 711 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
712#ifdef CONFIG_HIGHMEM 712#ifdef CONFIG_HIGHMEM