aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2018-01-09 11:07:15 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2018-01-10 05:27:06 -0500
commitaa8a5e0062ac940f7659394f4817c948dc8c0667 (patch)
treed1c50c8888d06aa876544559cc17319d78d2f798
parentc7305645eb0c1621351cfc104038831ae87c0053 (diff)
powerpc/64s: Add support for RFI flush of L1-D cache
On some CPUs we can prevent the Meltdown vulnerability by flushing the L1-D cache on exit from kernel to user mode, and from hypervisor to guest. This is known to be the case on at least Power7, Power8 and Power9. At this time we do not know the status of the vulnerability on other CPUs such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale CPUs. As more information comes to light we can enable this, or other mechanisms on those CPUs. The vulnerability occurs when the load of an architecturally inaccessible memory region (eg. userspace load of kernel memory) is speculatively executed to the point where its result can influence the address of a subsequent speculatively executed load. In order for that to happen, the first load must hit in the L1, because before the load is sent to the L2 the permission check is performed. Therefore if no kernel addresses hit in the L1 the vulnerability can not occur. We can ensure that is the case by flushing the L1 whenever we return to userspace. Similarly for hypervisor vs guest. In order to flush the L1-D cache on exit, we add a section of nops at each (h)rfi location that returns to a lower privileged context, and patch that with some sequence. Newer firmwares are able to advertise to us that there is a special nop instruction that flushes the L1-D. If we do not see that advertised, we fall back to doing a displacement flush in software. For guest kernels we support migration between some CPU versions, and different CPUs may use different flush instructions. So that we are prepared to migrate to a machine with a different flush instruction activated, we may have to patch more than one flush instruction at boot if the hypervisor tells us to. In the end this patch is mostly the work of Nicholas Piggin and Michael Ellerman. However a cast of thousands contributed to analysis of the issue, earlier versions of the patch, back ports testing etc. Many thanks to all of them. Tested-by: Jon Masters <jcm@redhat.com> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/exception-64s.h40
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h13
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/setup.h13
-rw-r--r--arch/powerpc/kernel/asm-offsets.c5
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S84
-rw-r--r--arch/powerpc/kernel/setup_64.c79
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S9
-rw-r--r--arch/powerpc/lib/feature-fixups.c41
9 files changed, 286 insertions, 8 deletions
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index dfc56daed98b..7197b179c1b1 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,34 +74,58 @@
74 */ 74 */
75#define EX_R3 EX_DAR 75#define EX_R3 EX_DAR
76 76
77/* Macros for annotating the expected destination of (h)rfid */ 77/*
78 * Macros for annotating the expected destination of (h)rfid
79 *
80 * The nop instructions allow us to insert one or more instructions to flush the
81 * L1-D cache when returning to userspace or a guest.
82 */
83#define RFI_FLUSH_SLOT \
84 RFI_FLUSH_FIXUP_SECTION; \
85 nop; \
86 nop; \
87 nop
78 88
79#define RFI_TO_KERNEL \ 89#define RFI_TO_KERNEL \
80 rfid 90 rfid
81 91
82#define RFI_TO_USER \ 92#define RFI_TO_USER \
83 rfid 93 RFI_FLUSH_SLOT; \
94 rfid; \
95 b rfi_flush_fallback
84 96
85#define RFI_TO_USER_OR_KERNEL \ 97#define RFI_TO_USER_OR_KERNEL \
86 rfid 98 RFI_FLUSH_SLOT; \
99 rfid; \
100 b rfi_flush_fallback
87 101
88#define RFI_TO_GUEST \ 102#define RFI_TO_GUEST \
89 rfid 103 RFI_FLUSH_SLOT; \
104 rfid; \
105 b rfi_flush_fallback
90 106
91#define HRFI_TO_KERNEL \ 107#define HRFI_TO_KERNEL \
92 hrfid 108 hrfid
93 109
94#define HRFI_TO_USER \ 110#define HRFI_TO_USER \
95 hrfid 111 RFI_FLUSH_SLOT; \
112 hrfid; \
113 b hrfi_flush_fallback
96 114
97#define HRFI_TO_USER_OR_KERNEL \ 115#define HRFI_TO_USER_OR_KERNEL \
98 hrfid 116 RFI_FLUSH_SLOT; \
117 hrfid; \
118 b hrfi_flush_fallback
99 119
100#define HRFI_TO_GUEST \ 120#define HRFI_TO_GUEST \
101 hrfid 121 RFI_FLUSH_SLOT; \
122 hrfid; \
123 b hrfi_flush_fallback
102 124
103#define HRFI_TO_UNKNOWN \ 125#define HRFI_TO_UNKNOWN \
104 hrfid 126 RFI_FLUSH_SLOT; \
127 hrfid; \
128 b hrfi_flush_fallback
105 129
106#ifdef CONFIG_RELOCATABLE 130#ifdef CONFIG_RELOCATABLE
107#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ 131#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 8f88f771cc55..1e82eb3caabd 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,7 +187,20 @@ label##3: \
187 FTR_ENTRY_OFFSET label##1b-label##3b; \ 187 FTR_ENTRY_OFFSET label##1b-label##3b; \
188 .popsection; 188 .popsection;
189 189
190#define RFI_FLUSH_FIXUP_SECTION \
191951: \
192 .pushsection __rfi_flush_fixup,"a"; \
193 .align 2; \
194952: \
195 FTR_ENTRY_OFFSET 951b-952b; \
196 .popsection;
197
198
190#ifndef __ASSEMBLY__ 199#ifndef __ASSEMBLY__
200#include <linux/types.h>
201
202extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
203
191void apply_feature_fixups(void); 204void apply_feature_fixups(void);
192void setup_feature_keys(void); 205void setup_feature_keys(void);
193#endif 206#endif
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 3892db93b837..23ac7fc0af23 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -232,6 +232,16 @@ struct paca_struct {
232 struct sibling_subcore_state *sibling_subcore_state; 232 struct sibling_subcore_state *sibling_subcore_state;
233#endif 233#endif
234#endif 234#endif
235#ifdef CONFIG_PPC_BOOK3S_64
236 /*
237 * rfi fallback flush must be in its own cacheline to prevent
238 * other paca data leaking into the L1d
239 */
240 u64 exrfi[EX_SIZE] __aligned(0x80);
241 void *rfi_flush_fallback_area;
242 u64 l1d_flush_congruence;
243 u64 l1d_flush_sets;
244#endif
235}; 245};
236 246
237extern void copy_mm_to_paca(struct mm_struct *mm); 247extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index cf00ec26303a..469b7fdc9be4 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
39static inline void pseries_little_endian_exceptions(void) {} 39static inline void pseries_little_endian_exceptions(void) {}
40#endif /* CONFIG_PPC_PSERIES */ 40#endif /* CONFIG_PPC_PSERIES */
41 41
42void rfi_flush_enable(bool enable);
43
44/* These are bit flags */
45enum l1d_flush_type {
46 L1D_FLUSH_NONE = 0x1,
47 L1D_FLUSH_FALLBACK = 0x2,
48 L1D_FLUSH_ORI = 0x4,
49 L1D_FLUSH_MTTRIG = 0x8,
50};
51
52void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
53void do_rfi_flush_fixups(enum l1d_flush_type types);
54
42#endif /* !__ASSEMBLY__ */ 55#endif /* !__ASSEMBLY__ */
43 56
44#endif /* _ASM_POWERPC_SETUP_H */ 57#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6b958414b4e0..f390d57cf2e1 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -237,6 +237,11 @@ int main(void)
237 OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); 237 OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
238 OFFSET(PACA_IN_MCE, paca_struct, in_mce); 238 OFFSET(PACA_IN_MCE, paca_struct, in_mce);
239 OFFSET(PACA_IN_NMI, paca_struct, in_nmi); 239 OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
240 OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
241 OFFSET(PACA_EXRFI, paca_struct, exrfi);
242 OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
243 OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
244
240#endif 245#endif
241 OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); 246 OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
242 OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); 247 OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ed356194f09c..2dc10bf646b8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1449,6 +1449,90 @@ masked_##_H##interrupt: \
1449 b .; \ 1449 b .; \
1450 MASKED_DEC_HANDLER(_H) 1450 MASKED_DEC_HANDLER(_H)
1451 1451
1452TRAMP_REAL_BEGIN(rfi_flush_fallback)
1453 SET_SCRATCH0(r13);
1454 GET_PACA(r13);
1455 std r9,PACA_EXRFI+EX_R9(r13)
1456 std r10,PACA_EXRFI+EX_R10(r13)
1457 std r11,PACA_EXRFI+EX_R11(r13)
1458 std r12,PACA_EXRFI+EX_R12(r13)
1459 std r8,PACA_EXRFI+EX_R13(r13)
1460 mfctr r9
1461 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1462 ld r11,PACA_L1D_FLUSH_SETS(r13)
1463 ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1464 /*
1465 * The load adresses are at staggered offsets within cachelines,
1466 * which suits some pipelines better (on others it should not
1467 * hurt).
1468 */
1469 addi r12,r12,8
1470 mtctr r11
1471 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1472
1473 /* order ld/st prior to dcbt stop all streams with flushing */
1474 sync
14751: li r8,0
1476 .rept 8 /* 8-way set associative */
1477 ldx r11,r10,r8
1478 add r8,r8,r12
1479 xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1480 add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1481 .endr
1482 addi r10,r10,128 /* 128 byte cache line */
1483 bdnz 1b
1484
1485 mtctr r9
1486 ld r9,PACA_EXRFI+EX_R9(r13)
1487 ld r10,PACA_EXRFI+EX_R10(r13)
1488 ld r11,PACA_EXRFI+EX_R11(r13)
1489 ld r12,PACA_EXRFI+EX_R12(r13)
1490 ld r8,PACA_EXRFI+EX_R13(r13)
1491 GET_SCRATCH0(r13);
1492 rfid
1493
1494TRAMP_REAL_BEGIN(hrfi_flush_fallback)
1495 SET_SCRATCH0(r13);
1496 GET_PACA(r13);
1497 std r9,PACA_EXRFI+EX_R9(r13)
1498 std r10,PACA_EXRFI+EX_R10(r13)
1499 std r11,PACA_EXRFI+EX_R11(r13)
1500 std r12,PACA_EXRFI+EX_R12(r13)
1501 std r8,PACA_EXRFI+EX_R13(r13)
1502 mfctr r9
1503 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
1504 ld r11,PACA_L1D_FLUSH_SETS(r13)
1505 ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
1506 /*
1507 * The load adresses are at staggered offsets within cachelines,
1508 * which suits some pipelines better (on others it should not
1509 * hurt).
1510 */
1511 addi r12,r12,8
1512 mtctr r11
1513 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
1514
1515 /* order ld/st prior to dcbt stop all streams with flushing */
1516 sync
15171: li r8,0
1518 .rept 8 /* 8-way set associative */
1519 ldx r11,r10,r8
1520 add r8,r8,r12
1521 xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
1522 add r8,r8,r11 // Add 0, this creates a dependency on the ldx
1523 .endr
1524 addi r10,r10,128 /* 128 byte cache line */
1525 bdnz 1b
1526
1527 mtctr r9
1528 ld r9,PACA_EXRFI+EX_R9(r13)
1529 ld r10,PACA_EXRFI+EX_R10(r13)
1530 ld r11,PACA_EXRFI+EX_R11(r13)
1531 ld r12,PACA_EXRFI+EX_R12(r13)
1532 ld r8,PACA_EXRFI+EX_R13(r13)
1533 GET_SCRATCH0(r13);
1534 hrfid
1535
1452/* 1536/*
1453 * Real mode exceptions actually use this too, but alternate 1537 * Real mode exceptions actually use this too, but alternate
1454 * instruction code patches (which end up in the common .text area) 1538 * instruction code patches (which end up in the common .text area)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8956a9856604..96163f4c3673 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -801,3 +801,82 @@ static int __init disable_hardlockup_detector(void)
801 return 0; 801 return 0;
802} 802}
803early_initcall(disable_hardlockup_detector); 803early_initcall(disable_hardlockup_detector);
804
805#ifdef CONFIG_PPC_BOOK3S_64
806static enum l1d_flush_type enabled_flush_types;
807static void *l1d_flush_fallback_area;
808bool rfi_flush;
809
810static void do_nothing(void *unused)
811{
812 /*
813 * We don't need to do the flush explicitly, just enter+exit kernel is
814 * sufficient, the RFI exit handlers will do the right thing.
815 */
816}
817
818void rfi_flush_enable(bool enable)
819{
820 if (rfi_flush == enable)
821 return;
822
823 if (enable) {
824 do_rfi_flush_fixups(enabled_flush_types);
825 on_each_cpu(do_nothing, NULL, 1);
826 } else
827 do_rfi_flush_fixups(L1D_FLUSH_NONE);
828
829 rfi_flush = enable;
830}
831
832static void init_fallback_flush(void)
833{
834 u64 l1d_size, limit;
835 int cpu;
836
837 l1d_size = ppc64_caches.l1d.size;
838 limit = min(safe_stack_limit(), ppc64_rma_size);
839
840 /*
841 * Align to L1d size, and size it at 2x L1d size, to catch possible
842 * hardware prefetch runoff. We don't have a recipe for load patterns to
843 * reliably avoid the prefetcher.
844 */
845 l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
846 memset(l1d_flush_fallback_area, 0, l1d_size * 2);
847
848 for_each_possible_cpu(cpu) {
849 /*
850 * The fallback flush is currently coded for 8-way
851 * associativity. Different associativity is possible, but it
852 * will be treated as 8-way and may not evict the lines as
853 * effectively.
854 *
855 * 128 byte lines are mandatory.
856 */
857 u64 c = l1d_size / 8;
858
859 paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
860 paca[cpu].l1d_flush_congruence = c;
861 paca[cpu].l1d_flush_sets = c / 128;
862 }
863}
864
865void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
866{
867 if (types & L1D_FLUSH_FALLBACK) {
868 pr_info("rfi-flush: Using fallback displacement flush\n");
869 init_fallback_flush();
870 }
871
872 if (types & L1D_FLUSH_ORI)
873 pr_info("rfi-flush: Using ori type flush\n");
874
875 if (types & L1D_FLUSH_MTTRIG)
876 pr_info("rfi-flush: Using mttrig type flush\n");
877
878 enabled_flush_types = types;
879
880 rfi_flush_enable(enable);
881}
882#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 0494e1566ee2..307843d23682 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@ SECTIONS
132 /* Read-only data */ 132 /* Read-only data */
133 RO_DATA(PAGE_SIZE) 133 RO_DATA(PAGE_SIZE)
134 134
135#ifdef CONFIG_PPC64
136 . = ALIGN(8);
137 __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
138 __start___rfi_flush_fixup = .;
139 *(__rfi_flush_fixup)
140 __stop___rfi_flush_fixup = .;
141 }
142#endif
143
135 EXCEPTION_TABLE(0) 144 EXCEPTION_TABLE(0)
136 145
137 NOTES :kernel :notes 146 NOTES :kernel :notes
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 41cf5ae273cf..a95ea007d654 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
116 } 116 }
117} 117}
118 118
119#ifdef CONFIG_PPC_BOOK3S_64
120void do_rfi_flush_fixups(enum l1d_flush_type types)
121{
122 unsigned int instrs[3], *dest;
123 long *start, *end;
124 int i;
125
126 start = PTRRELOC(&__start___rfi_flush_fixup),
127 end = PTRRELOC(&__stop___rfi_flush_fixup);
128
129 instrs[0] = 0x60000000; /* nop */
130 instrs[1] = 0x60000000; /* nop */
131 instrs[2] = 0x60000000; /* nop */
132
133 if (types & L1D_FLUSH_FALLBACK)
134 /* b .+16 to fallback flush */
135 instrs[0] = 0x48000010;
136
137 i = 0;
138 if (types & L1D_FLUSH_ORI) {
139 instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
140 instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
141 }
142
143 if (types & L1D_FLUSH_MTTRIG)
144 instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
145
146 for (i = 0; start < end; start++, i++) {
147 dest = (void *)start + *start;
148
149 pr_devel("patching dest %lx\n", (unsigned long)dest);
150
151 patch_instruction(dest, instrs[0]);
152 patch_instruction(dest + 1, instrs[1]);
153 patch_instruction(dest + 2, instrs[2]);
154 }
155
156 printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
157}
158#endif /* CONFIG_PPC_BOOK3S_64 */
159
119void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) 160void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
120{ 161{
121 long *start, *end; 162 long *start, *end;