aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/process_64.c101
1 files changed, 73 insertions, 28 deletions
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3ed4a68d4013..5a2c02913af3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
283 283
284 fpu = switch_fpu_prepare(prev_p, next_p, cpu); 284 fpu = switch_fpu_prepare(prev_p, next_p, cpu);
285 285
286 /* 286 /* Reload esp0 and ss1. */
287 * Reload esp0, LDT and the page table pointer:
288 */
289 load_sp0(tss, next); 287 load_sp0(tss, next);
290 288
291 /*
292 * Switch DS and ES.
293 * This won't pick up thread selector changes, but I guess that is ok.
294 */
295 savesegment(es, prev->es);
296 if (unlikely(next->es | prev->es))
297 loadsegment(es, next->es);
298
299 savesegment(ds, prev->ds);
300 if (unlikely(next->ds | prev->ds))
301 loadsegment(ds, next->ds);
302
303
304 /* We must save %fs and %gs before load_TLS() because 289 /* We must save %fs and %gs before load_TLS() because
305 * %fs and %gs may be cleared by load_TLS(). 290 * %fs and %gs may be cleared by load_TLS().
306 * 291 *
@@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
309 savesegment(fs, fsindex); 294 savesegment(fs, fsindex);
310 savesegment(gs, gsindex); 295 savesegment(gs, gsindex);
311 296
297 /*
298 * Load TLS before restoring any segments so that segment loads
299 * reference the correct GDT entries.
300 */
312 load_TLS(next, cpu); 301 load_TLS(next, cpu);
313 302
314 /* 303 /*
315 * Leave lazy mode, flushing any hypercalls made here. 304 * Leave lazy mode, flushing any hypercalls made here. This
316 * This must be done before restoring TLS segments so 305 * must be done after loading TLS entries in the GDT but before
317 * the GDT and LDT are properly updated, and must be 306 * loading segments that might reference them, and and it must
318 * done before math_state_restore, so the TS bit is up 307 * be done before math_state_restore, so the TS bit is up to
319 * to date. 308 * date.
320 */ 309 */
321 arch_end_context_switch(next_p); 310 arch_end_context_switch(next_p);
322 311
312 /* Switch DS and ES.
313 *
314 * Reading them only returns the selectors, but writing them (if
315 * nonzero) loads the full descriptor from the GDT or LDT. The
316 * LDT for next is loaded in switch_mm, and the GDT is loaded
317 * above.
318 *
319 * We therefore need to write new values to the segment
320 * registers on every context switch unless both the new and old
321 * values are zero.
322 *
323 * Note that we don't need to do anything for CS and SS, as
324 * those are saved and restored as part of pt_regs.
325 */
326 savesegment(es, prev->es);
327 if (unlikely(next->es | prev->es))
328 loadsegment(es, next->es);
329
330 savesegment(ds, prev->ds);
331 if (unlikely(next->ds | prev->ds))
332 loadsegment(ds, next->ds);
333
323 /* 334 /*
324 * Switch FS and GS. 335 * Switch FS and GS.
325 * 336 *
326 * Segment register != 0 always requires a reload. Also 337 * These are even more complicated than FS and GS: they have
327 * reload when it has changed. When prev process used 64bit 338 * 64-bit bases are that controlled by arch_prctl. Those bases
328 * base always reload to avoid an information leak. 339 * only differ from the values in the GDT or LDT if the selector
340 * is 0.
341 *
342 * Loading the segment register resets the hidden base part of
343 * the register to 0 or the value from the GDT / LDT. If the
344 * next base address zero, writing 0 to the segment register is
345 * much faster than using wrmsr to explicitly zero the base.
346 *
347 * The thread_struct.fs and thread_struct.gs values are 0
348 * if the fs and gs bases respectively are not overridden
349 * from the values implied by fsindex and gsindex. They
350 * are nonzero, and store the nonzero base addresses, if
351 * the bases are overridden.
352 *
353 * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
354 * be impossible.
355 *
356 * Therefore we need to reload the segment registers if either
357 * the old or new selector is nonzero, and we need to override
358 * the base address if next thread expects it to be overridden.
359 *
360 * This code is unnecessarily slow in the case where the old and
361 * new indexes are zero and the new base is nonzero -- it will
362 * unnecessarily write 0 to the selector before writing the new
363 * base address.
364 *
365 * Note: This all depends on arch_prctl being the only way that
366 * user code can override the segment base. Once wrfsbase and
367 * wrgsbase are enabled, most of this code will need to change.
329 */ 368 */
330 if (unlikely(fsindex | next->fsindex | prev->fs)) { 369 if (unlikely(fsindex | next->fsindex | prev->fs)) {
331 loadsegment(fs, next->fsindex); 370 loadsegment(fs, next->fsindex);
371
332 /* 372 /*
333 * Check if the user used a selector != 0; if yes 373 * If user code wrote a nonzero value to FS, then it also
334 * clear 64bit base, since overloaded base is always 374 * cleared the overridden base address.
335 * mapped to the Null selector 375 *
376 * XXX: if user code wrote 0 to FS and cleared the base
377 * address itself, we won't notice and we'll incorrectly
378 * restore the prior base address next time we reschdule
379 * the process.
336 */ 380 */
337 if (fsindex) 381 if (fsindex)
338 prev->fs = 0; 382 prev->fs = 0;
339 } 383 }
340 /* when next process has a 64bit base use it */
341 if (next->fs) 384 if (next->fs)
342 wrmsrl(MSR_FS_BASE, next->fs); 385 wrmsrl(MSR_FS_BASE, next->fs);
343 prev->fsindex = fsindex; 386 prev->fsindex = fsindex;
344 387
345 if (unlikely(gsindex | next->gsindex | prev->gs)) { 388 if (unlikely(gsindex | next->gsindex | prev->gs)) {
346 load_gs_index(next->gsindex); 389 load_gs_index(next->gsindex);
390
391 /* This works (and fails) the same way as fsindex above. */
347 if (gsindex) 392 if (gsindex)
348 prev->gs = 0; 393 prev->gs = 0;
349 } 394 }