summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2019-08-28 18:31:34 -0400
committerThomas Gleixner <tglx@linutronix.de>2019-08-29 14:48:44 -0400
commit7af0145067bc429a09ac4047b167c0971c9f0dc7 (patch)
tree1ef61334817f6d15bb5739c509ec05bccc8745d9
parent42e0e95474fc6076b5cd68cab8fa0340a1797a72 (diff)
x86/mm/cpa: Prevent large page split when ftrace flips RW on kernel text
ftrace does not use text_poke() for enabling trace functionality. It uses its own mechanism and flips the whole kernel text to RW and back to RO. The CPA rework removed a loop based check of 4k pages which tried to preserve a large page by checking each 4k page whether the change would actually cover all pages in the large page. This resulted in endless loops for nothing as in testing it turned out that it actually never preserved anything. Of course testing missed to include ftrace, which is the one and only case which benefitted from the 4k loop. As a consequence enabling function tracing or ftrace based kprobes results in a full 4k split of the kernel text, which affects iTLB performance. The kernel RO protection is the only valid case where this can actually preserve large pages. All other static protections (RO data, data NX, PCI, BIOS) are truly static. So a conflict with those protections which results in a split should only ever happen when a change of memory next to a protected region is attempted. But these conflicts are rightfully splitting the large page to preserve the protected regions. In fact a change to the protected regions itself is a bug and is warned about. Add an exception for the static protection check for kernel text RO when the to be changed region spawns a full large page which allows to preserve the large mappings. This also prevents the syslog to be spammed about CPA violations when ftrace is used. The exception needs to be removed once ftrace switched over to text_poke() which avoids the whole issue. Fixes: 585948f4f695 ("x86/mm/cpa: Avoid the 4k pages check completely") Reported-by: Song Liu <songliubraving@fb.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Song Liu <songliubraving@fb.com> Reviewed-by: Song Liu <songliubraving@fb.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908282355340.1938@nanos.tec.linutronix.de
-rw-r--r--arch/x86/mm/pageattr.c26
1 files changed, 18 insertions, 8 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 6a9a77a403c9..e14e95ea7338 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -516,7 +516,7 @@ static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
516 */ 516 */
517static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, 517static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
518 unsigned long pfn, unsigned long npg, 518 unsigned long pfn, unsigned long npg,
519 int warnlvl) 519 unsigned long lpsize, int warnlvl)
520{ 520{
521 pgprotval_t forbidden, res; 521 pgprotval_t forbidden, res;
522 unsigned long end; 522 unsigned long end;
@@ -535,9 +535,17 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
535 check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX"); 535 check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
536 forbidden = res; 536 forbidden = res;
537 537
538 res = protect_kernel_text_ro(start, end); 538 /*
539 check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO"); 539 * Special case to preserve a large page. If the change spawns the
540 forbidden |= res; 540 * full large page mapping then there is no point to split it
541 * up. Happens with ftrace and is going to be removed once ftrace
542 * switched to text_poke().
543 */
544 if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
545 res = protect_kernel_text_ro(start, end);
546 check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
547 forbidden |= res;
548 }
541 549
542 /* Check the PFN directly */ 550 /* Check the PFN directly */
543 res = protect_pci_bios(pfn, pfn + npg - 1); 551 res = protect_pci_bios(pfn, pfn + npg - 1);
@@ -819,7 +827,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
819 * extra conditional required here. 827 * extra conditional required here.
820 */ 828 */
821 chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages, 829 chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
822 CPA_CONFLICT); 830 psize, CPA_CONFLICT);
823 831
824 if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) { 832 if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
825 /* 833 /*
@@ -855,7 +863,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
855 * protection requirement in the large page. 863 * protection requirement in the large page.
856 */ 864 */
857 new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, 865 new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
858 CPA_DETECT); 866 psize, CPA_DETECT);
859 867
860 /* 868 /*
861 * If there is a conflict, split the large page. 869 * If there is a conflict, split the large page.
@@ -906,7 +914,8 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
906 if (!cpa->force_static_prot) 914 if (!cpa->force_static_prot)
907 goto set; 915 goto set;
908 916
909 prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT); 917 /* Hand in lpsize = 0 to enforce the protection mechanism */
918 prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
910 919
911 if (pgprot_val(prot) == pgprot_val(ref_prot)) 920 if (pgprot_val(prot) == pgprot_val(ref_prot))
912 goto set; 921 goto set;
@@ -1503,7 +1512,8 @@ repeat:
1503 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 1512 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
1504 1513
1505 cpa_inc_4k_install(); 1514 cpa_inc_4k_install();
1506 new_prot = static_protections(new_prot, address, pfn, 1, 1515 /* Hand in lpsize = 0 to enforce the protection mechanism */
1516 new_prot = static_protections(new_prot, address, pfn, 1, 0,
1507 CPA_PROTECT); 1517 CPA_PROTECT);
1508 1518
1509 new_prot = pgprot_clear_protnone_bits(new_prot); 1519 new_prot = pgprot_clear_protnone_bits(new_prot);