diff options
| author | Joerg Roedel <jroedel@suse.de> | 2018-08-07 06:24:31 -0400 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2018-08-07 17:36:02 -0400 |
| commit | 16a3fe634f6a568c6234b8747e5d50487fed3526 (patch) | |
| tree | 57022662196020a7186f1257259215570cdfa878 | |
| parent | 30514effc9206d4e084ec32239ae221db157d43a (diff) | |
x86/mm/pti: Clone kernel-image on PTE level for 32 bit
On 32 bit the kernel sections are not huge-page aligned. When we clone
them on PMD-level we unevitably map some areas that are normal kernel
memory and may contain secrets to user-space. To prevent that we need to
clone the kernel-image on PTE-level for 32 bit.
Also make the page-table cloning code more general so that it can handle
PMD and PTE level cloning. This can be generalized further in the future to
also handle clones on the P4D-level.
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: linux-mm@kvack.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Waiman Long <llong@redhat.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "David H . Gutteridge" <dhgutteridge@sympatico.ca>
Cc: joro@8bytes.org
Link: https://lkml.kernel.org/r/1533637471-30953-4-git-send-email-joro@8bytes.org
| -rw-r--r-- | arch/x86/mm/pti.c | 140 |
1 files changed, 99 insertions, 41 deletions
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 5164c987b1f1..1dc5c683e7a5 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c | |||
| @@ -54,6 +54,16 @@ | |||
| 54 | #define __GFP_NOTRACK 0 | 54 | #define __GFP_NOTRACK 0 |
| 55 | #endif | 55 | #endif |
| 56 | 56 | ||
| 57 | /* | ||
| 58 | * Define the page-table levels we clone for user-space on 32 | ||
| 59 | * and 64 bit. | ||
| 60 | */ | ||
| 61 | #ifdef CONFIG_X86_64 | ||
| 62 | #define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PMD | ||
| 63 | #else | ||
| 64 | #define PTI_LEVEL_KERNEL_IMAGE PTI_CLONE_PTE | ||
| 65 | #endif | ||
| 66 | |||
| 57 | static void __init pti_print_if_insecure(const char *reason) | 67 | static void __init pti_print_if_insecure(const char *reason) |
| 58 | { | 68 | { |
| 59 | if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) | 69 | if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) |
| @@ -228,7 +238,6 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) | |||
| 228 | return pmd_offset(pud, address); | 238 | return pmd_offset(pud, address); |
| 229 | } | 239 | } |
| 230 | 240 | ||
| 231 | #ifdef CONFIG_X86_VSYSCALL_EMULATION | ||
| 232 | /* | 241 | /* |
| 233 | * Walk the shadow copy of the page tables (optionally) trying to allocate | 242 | * Walk the shadow copy of the page tables (optionally) trying to allocate |
| 234 | * page table pages on the way down. Does not support large pages. | 243 | * page table pages on the way down. Does not support large pages. |
| @@ -270,6 +279,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) | |||
| 270 | return pte; | 279 | return pte; |
| 271 | } | 280 | } |
| 272 | 281 | ||
| 282 | #ifdef CONFIG_X86_VSYSCALL_EMULATION | ||
| 273 | static void __init pti_setup_vsyscall(void) | 283 | static void __init pti_setup_vsyscall(void) |
| 274 | { | 284 | { |
| 275 | pte_t *pte, *target_pte; | 285 | pte_t *pte, *target_pte; |
| @@ -290,8 +300,14 @@ static void __init pti_setup_vsyscall(void) | |||
| 290 | static void __init pti_setup_vsyscall(void) { } | 300 | static void __init pti_setup_vsyscall(void) { } |
| 291 | #endif | 301 | #endif |
| 292 | 302 | ||
| 303 | enum pti_clone_level { | ||
| 304 | PTI_CLONE_PMD, | ||
| 305 | PTI_CLONE_PTE, | ||
| 306 | }; | ||
| 307 | |||
| 293 | static void | 308 | static void |
| 294 | pti_clone_pmds(unsigned long start, unsigned long end) | 309 | pti_clone_pgtable(unsigned long start, unsigned long end, |
| 310 | enum pti_clone_level level) | ||
| 295 | { | 311 | { |
| 296 | unsigned long addr; | 312 | unsigned long addr; |
| 297 | 313 | ||
| @@ -299,7 +315,8 @@ pti_clone_pmds(unsigned long start, unsigned long end) | |||
| 299 | * Clone the populated PMDs which cover start to end. These PMD areas | 315 | * Clone the populated PMDs which cover start to end. These PMD areas |
| 300 | * can have holes. | 316 | * can have holes. |
| 301 | */ | 317 | */ |
| 302 | for (addr = start; addr < end; addr += PMD_SIZE) { | 318 | for (addr = start; addr < end;) { |
| 319 | pte_t *pte, *target_pte; | ||
| 303 | pmd_t *pmd, *target_pmd; | 320 | pmd_t *pmd, *target_pmd; |
| 304 | pgd_t *pgd; | 321 | pgd_t *pgd; |
| 305 | p4d_t *p4d; | 322 | p4d_t *p4d; |
| @@ -315,44 +332,84 @@ pti_clone_pmds(unsigned long start, unsigned long end) | |||
| 315 | p4d = p4d_offset(pgd, addr); | 332 | p4d = p4d_offset(pgd, addr); |
| 316 | if (WARN_ON(p4d_none(*p4d))) | 333 | if (WARN_ON(p4d_none(*p4d))) |
| 317 | return; | 334 | return; |
| 335 | |||
| 318 | pud = pud_offset(p4d, addr); | 336 | pud = pud_offset(p4d, addr); |
| 319 | if (pud_none(*pud)) | 337 | if (pud_none(*pud)) { |
| 338 | addr += PUD_SIZE; | ||
| 320 | continue; | 339 | continue; |
| 340 | } | ||
| 341 | |||
| 321 | pmd = pmd_offset(pud, addr); | 342 | pmd = pmd_offset(pud, addr); |
| 322 | if (pmd_none(*pmd)) | 343 | if (pmd_none(*pmd)) { |
| 344 | addr += PMD_SIZE; | ||
| 323 | continue; | 345 | continue; |
| 346 | } | ||
| 324 | 347 | ||
| 325 | target_pmd = pti_user_pagetable_walk_pmd(addr); | 348 | if (pmd_large(*pmd) || level == PTI_CLONE_PMD) { |
| 326 | if (WARN_ON(!target_pmd)) | 349 | target_pmd = pti_user_pagetable_walk_pmd(addr); |
| 327 | return; | 350 | if (WARN_ON(!target_pmd)) |
| 328 | 351 | return; | |
| 329 | /* | 352 | |
| 330 | * Only clone present PMDs. This ensures only setting | 353 | /* |
| 331 | * _PAGE_GLOBAL on present PMDs. This should only be | 354 | * Only clone present PMDs. This ensures only setting |
| 332 | * called on well-known addresses anyway, so a non- | 355 | * _PAGE_GLOBAL on present PMDs. This should only be |
| 333 | * present PMD would be a surprise. | 356 | * called on well-known addresses anyway, so a non- |
| 334 | */ | 357 | * present PMD would be a surprise. |
| 335 | if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT))) | 358 | */ |
| 336 | return; | 359 | if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT))) |
| 337 | 360 | return; | |
| 338 | /* | 361 | |
| 339 | * Setting 'target_pmd' below creates a mapping in both | 362 | /* |
| 340 | * the user and kernel page tables. It is effectively | 363 | * Setting 'target_pmd' below creates a mapping in both |
| 341 | * global, so set it as global in both copies. Note: | 364 | * the user and kernel page tables. It is effectively |
| 342 | * the X86_FEATURE_PGE check is not _required_ because | 365 | * global, so set it as global in both copies. Note: |
| 343 | * the CPU ignores _PAGE_GLOBAL when PGE is not | 366 | * the X86_FEATURE_PGE check is not _required_ because |
| 344 | * supported. The check keeps consistentency with | 367 | * the CPU ignores _PAGE_GLOBAL when PGE is not |
| 345 | * code that only set this bit when supported. | 368 | * supported. The check keeps consistentency with |
| 346 | */ | 369 | * code that only set this bit when supported. |
| 347 | if (boot_cpu_has(X86_FEATURE_PGE)) | 370 | */ |
| 348 | *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); | 371 | if (boot_cpu_has(X86_FEATURE_PGE)) |
| 349 | 372 | *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); | |
| 350 | /* | 373 | |
| 351 | * Copy the PMD. That is, the kernelmode and usermode | 374 | /* |
| 352 | * tables will share the last-level page tables of this | 375 | * Copy the PMD. That is, the kernelmode and usermode |
| 353 | * address range | 376 | * tables will share the last-level page tables of this |
| 354 | */ | 377 | * address range |
| 355 | *target_pmd = *pmd; | 378 | */ |
| 379 | *target_pmd = *pmd; | ||
| 380 | |||
| 381 | addr += PMD_SIZE; | ||
| 382 | |||
| 383 | } else if (level == PTI_CLONE_PTE) { | ||
| 384 | |||
| 385 | /* Walk the page-table down to the pte level */ | ||
| 386 | pte = pte_offset_kernel(pmd, addr); | ||
| 387 | if (pte_none(*pte)) { | ||
| 388 | addr += PAGE_SIZE; | ||
| 389 | continue; | ||
| 390 | } | ||
| 391 | |||
| 392 | /* Only clone present PTEs */ | ||
| 393 | if (WARN_ON(!(pte_flags(*pte) & _PAGE_PRESENT))) | ||
| 394 | return; | ||
| 395 | |||
| 396 | /* Allocate PTE in the user page-table */ | ||
| 397 | target_pte = pti_user_pagetable_walk_pte(addr); | ||
| 398 | if (WARN_ON(!target_pte)) | ||
| 399 | return; | ||
| 400 | |||
| 401 | /* Set GLOBAL bit in both PTEs */ | ||
| 402 | if (boot_cpu_has(X86_FEATURE_PGE)) | ||
| 403 | *pte = pte_set_flags(*pte, _PAGE_GLOBAL); | ||
| 404 | |||
| 405 | /* Clone the PTE */ | ||
| 406 | *target_pte = *pte; | ||
| 407 | |||
| 408 | addr += PAGE_SIZE; | ||
| 409 | |||
| 410 | } else { | ||
| 411 | BUG(); | ||
| 412 | } | ||
| 356 | } | 413 | } |
| 357 | } | 414 | } |
| 358 | 415 | ||
| @@ -398,7 +455,7 @@ static void __init pti_clone_user_shared(void) | |||
| 398 | start = CPU_ENTRY_AREA_BASE; | 455 | start = CPU_ENTRY_AREA_BASE; |
| 399 | end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); | 456 | end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); |
| 400 | 457 | ||
| 401 | pti_clone_pmds(start, end); | 458 | pti_clone_pgtable(start, end, PTI_CLONE_PMD); |
| 402 | } | 459 | } |
| 403 | #endif /* CONFIG_X86_64 */ | 460 | #endif /* CONFIG_X86_64 */ |
| 404 | 461 | ||
| @@ -417,8 +474,9 @@ static void __init pti_setup_espfix64(void) | |||
| 417 | */ | 474 | */ |
| 418 | static void pti_clone_entry_text(void) | 475 | static void pti_clone_entry_text(void) |
| 419 | { | 476 | { |
| 420 | pti_clone_pmds((unsigned long) __entry_text_start, | 477 | pti_clone_pgtable((unsigned long) __entry_text_start, |
| 421 | (unsigned long) __irqentry_text_end); | 478 | (unsigned long) __irqentry_text_end, |
| 479 | PTI_CLONE_PMD); | ||
| 422 | } | 480 | } |
| 423 | 481 | ||
| 424 | /* | 482 | /* |
| @@ -500,10 +558,10 @@ static void pti_clone_kernel_text(void) | |||
| 500 | * pti_set_kernel_image_nonglobal() did to clear the | 558 | * pti_set_kernel_image_nonglobal() did to clear the |
| 501 | * global bit. | 559 | * global bit. |
| 502 | */ | 560 | */ |
| 503 | pti_clone_pmds(start, end_clone); | 561 | pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE); |
| 504 | 562 | ||
| 505 | /* | 563 | /* |
| 506 | * pti_clone_pmds() will set the global bit in any PMDs | 564 | * pti_clone_pgtable() will set the global bit in any PMDs |
| 507 | * that it clones, but we also need to get any PTEs in | 565 | * that it clones, but we also need to get any PTEs in |
| 508 | * the last level for areas that are not huge-page-aligned. | 566 | * the last level for areas that are not huge-page-aligned. |
| 509 | */ | 567 | */ |
