diff options
| -rw-r--r-- | arch/x86/kernel/process_64.c | 101 |
1 files changed, 73 insertions, 28 deletions
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3ed4a68d4013..5a2c02913af3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 283 | 283 | ||
| 284 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); | 284 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
| 285 | 285 | ||
| 286 | /* | 286 | /* Reload esp0 and ss1. */ |
| 287 | * Reload esp0, LDT and the page table pointer: | ||
| 288 | */ | ||
| 289 | load_sp0(tss, next); | 287 | load_sp0(tss, next); |
| 290 | 288 | ||
| 291 | /* | ||
| 292 | * Switch DS and ES. | ||
| 293 | * This won't pick up thread selector changes, but I guess that is ok. | ||
| 294 | */ | ||
| 295 | savesegment(es, prev->es); | ||
| 296 | if (unlikely(next->es | prev->es)) | ||
| 297 | loadsegment(es, next->es); | ||
| 298 | |||
| 299 | savesegment(ds, prev->ds); | ||
| 300 | if (unlikely(next->ds | prev->ds)) | ||
| 301 | loadsegment(ds, next->ds); | ||
| 302 | |||
| 303 | |||
| 304 | /* We must save %fs and %gs before load_TLS() because | 289 | /* We must save %fs and %gs before load_TLS() because |
| 305 | * %fs and %gs may be cleared by load_TLS(). | 290 | * %fs and %gs may be cleared by load_TLS(). |
| 306 | * | 291 | * |
| @@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 309 | savesegment(fs, fsindex); | 294 | savesegment(fs, fsindex); |
| 310 | savesegment(gs, gsindex); | 295 | savesegment(gs, gsindex); |
| 311 | 296 | ||
| 297 | /* | ||
| 298 | * Load TLS before restoring any segments so that segment loads | ||
| 299 | * reference the correct GDT entries. | ||
| 300 | */ | ||
| 312 | load_TLS(next, cpu); | 301 | load_TLS(next, cpu); |
| 313 | 302 | ||
| 314 | /* | 303 | /* |
| 315 | * Leave lazy mode, flushing any hypercalls made here. | 304 | * Leave lazy mode, flushing any hypercalls made here. This |
| 316 | * This must be done before restoring TLS segments so | 305 | * must be done after loading TLS entries in the GDT but before |
| 317 | * the GDT and LDT are properly updated, and must be | 306 | * loading segments that might reference them, and and it must |
| 318 | * done before math_state_restore, so the TS bit is up | 307 | * be done before math_state_restore, so the TS bit is up to |
| 319 | * to date. | 308 | * date. |
| 320 | */ | 309 | */ |
| 321 | arch_end_context_switch(next_p); | 310 | arch_end_context_switch(next_p); |
| 322 | 311 | ||
| 312 | /* Switch DS and ES. | ||
| 313 | * | ||
| 314 | * Reading them only returns the selectors, but writing them (if | ||
| 315 | * nonzero) loads the full descriptor from the GDT or LDT. The | ||
| 316 | * LDT for next is loaded in switch_mm, and the GDT is loaded | ||
| 317 | * above. | ||
| 318 | * | ||
| 319 | * We therefore need to write new values to the segment | ||
| 320 | * registers on every context switch unless both the new and old | ||
| 321 | * values are zero. | ||
| 322 | * | ||
| 323 | * Note that we don't need to do anything for CS and SS, as | ||
| 324 | * those are saved and restored as part of pt_regs. | ||
| 325 | */ | ||
| 326 | savesegment(es, prev->es); | ||
| 327 | if (unlikely(next->es | prev->es)) | ||
| 328 | loadsegment(es, next->es); | ||
| 329 | |||
| 330 | savesegment(ds, prev->ds); | ||
| 331 | if (unlikely(next->ds | prev->ds)) | ||
| 332 | loadsegment(ds, next->ds); | ||
| 333 | |||
| 323 | /* | 334 | /* |
| 324 | * Switch FS and GS. | 335 | * Switch FS and GS. |
| 325 | * | 336 | * |
| 326 | * Segment register != 0 always requires a reload. Also | 337 | * These are even more complicated than FS and GS: they have |
| 327 | * reload when it has changed. When prev process used 64bit | 338 | * 64-bit bases are that controlled by arch_prctl. Those bases |
| 328 | * base always reload to avoid an information leak. | 339 | * only differ from the values in the GDT or LDT if the selector |
| 340 | * is 0. | ||
| 341 | * | ||
| 342 | * Loading the segment register resets the hidden base part of | ||
| 343 | * the register to 0 or the value from the GDT / LDT. If the | ||
| 344 | * next base address zero, writing 0 to the segment register is | ||
| 345 | * much faster than using wrmsr to explicitly zero the base. | ||
| 346 | * | ||
| 347 | * The thread_struct.fs and thread_struct.gs values are 0 | ||
| 348 | * if the fs and gs bases respectively are not overridden | ||
| 349 | * from the values implied by fsindex and gsindex. They | ||
| 350 | * are nonzero, and store the nonzero base addresses, if | ||
| 351 | * the bases are overridden. | ||
| 352 | * | ||
| 353 | * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should | ||
| 354 | * be impossible. | ||
| 355 | * | ||
| 356 | * Therefore we need to reload the segment registers if either | ||
| 357 | * the old or new selector is nonzero, and we need to override | ||
| 358 | * the base address if next thread expects it to be overridden. | ||
| 359 | * | ||
| 360 | * This code is unnecessarily slow in the case where the old and | ||
| 361 | * new indexes are zero and the new base is nonzero -- it will | ||
| 362 | * unnecessarily write 0 to the selector before writing the new | ||
| 363 | * base address. | ||
| 364 | * | ||
| 365 | * Note: This all depends on arch_prctl being the only way that | ||
| 366 | * user code can override the segment base. Once wrfsbase and | ||
| 367 | * wrgsbase are enabled, most of this code will need to change. | ||
| 329 | */ | 368 | */ |
| 330 | if (unlikely(fsindex | next->fsindex | prev->fs)) { | 369 | if (unlikely(fsindex | next->fsindex | prev->fs)) { |
| 331 | loadsegment(fs, next->fsindex); | 370 | loadsegment(fs, next->fsindex); |
| 371 | |||
| 332 | /* | 372 | /* |
| 333 | * Check if the user used a selector != 0; if yes | 373 | * If user code wrote a nonzero value to FS, then it also |
| 334 | * clear 64bit base, since overloaded base is always | 374 | * cleared the overridden base address. |
| 335 | * mapped to the Null selector | 375 | * |
| 376 | * XXX: if user code wrote 0 to FS and cleared the base | ||
| 377 | * address itself, we won't notice and we'll incorrectly | ||
| 378 | * restore the prior base address next time we reschdule | ||
| 379 | * the process. | ||
| 336 | */ | 380 | */ |
| 337 | if (fsindex) | 381 | if (fsindex) |
| 338 | prev->fs = 0; | 382 | prev->fs = 0; |
| 339 | } | 383 | } |
| 340 | /* when next process has a 64bit base use it */ | ||
| 341 | if (next->fs) | 384 | if (next->fs) |
| 342 | wrmsrl(MSR_FS_BASE, next->fs); | 385 | wrmsrl(MSR_FS_BASE, next->fs); |
| 343 | prev->fsindex = fsindex; | 386 | prev->fsindex = fsindex; |
| 344 | 387 | ||
| 345 | if (unlikely(gsindex | next->gsindex | prev->gs)) { | 388 | if (unlikely(gsindex | next->gsindex | prev->gs)) { |
| 346 | load_gs_index(next->gsindex); | 389 | load_gs_index(next->gsindex); |
| 390 | |||
| 391 | /* This works (and fails) the same way as fsindex above. */ | ||
| 347 | if (gsindex) | 392 | if (gsindex) |
| 348 | prev->gs = 0; | 393 | prev->gs = 0; |
| 349 | } | 394 | } |
