diff options
Diffstat (limited to 'arch/powerpc/kernel/mce.c')
-rw-r--r-- | arch/powerpc/kernel/mce.c | 352 |
1 files changed, 352 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c new file mode 100644 index 000000000000..cadef7e64e42 --- /dev/null +++ b/arch/powerpc/kernel/mce.c | |||
@@ -0,0 +1,352 @@ | |||
1 | /* | ||
2 | * Machine check exception handling. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright 2013 IBM Corporation | ||
19 | * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #undef DEBUG | ||
23 | #define pr_fmt(fmt) "mce: " fmt | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <linux/ptrace.h> | ||
27 | #include <linux/percpu.h> | ||
28 | #include <linux/export.h> | ||
29 | #include <linux/irq_work.h> | ||
30 | #include <asm/mce.h> | ||
31 | |||
32 | static DEFINE_PER_CPU(int, mce_nest_count); | ||
33 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); | ||
34 | |||
35 | /* Queue for delayed MCE events. */ | ||
36 | static DEFINE_PER_CPU(int, mce_queue_count); | ||
37 | static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); | ||
38 | |||
39 | static void machine_check_process_queued_event(struct irq_work *work); | ||
40 | struct irq_work mce_event_process_work = { | ||
41 | .func = machine_check_process_queued_event, | ||
42 | }; | ||
43 | |||
44 | static void mce_set_error_info(struct machine_check_event *mce, | ||
45 | struct mce_error_info *mce_err) | ||
46 | { | ||
47 | mce->error_type = mce_err->error_type; | ||
48 | switch (mce_err->error_type) { | ||
49 | case MCE_ERROR_TYPE_UE: | ||
50 | mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; | ||
51 | break; | ||
52 | case MCE_ERROR_TYPE_SLB: | ||
53 | mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; | ||
54 | break; | ||
55 | case MCE_ERROR_TYPE_ERAT: | ||
56 | mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; | ||
57 | break; | ||
58 | case MCE_ERROR_TYPE_TLB: | ||
59 | mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; | ||
60 | break; | ||
61 | case MCE_ERROR_TYPE_UNKNOWN: | ||
62 | default: | ||
63 | break; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * Decode and save high level MCE information into per cpu buffer which | ||
69 | * is an array of machine_check_event structure. | ||
70 | */ | ||
71 | void save_mce_event(struct pt_regs *regs, long handled, | ||
72 | struct mce_error_info *mce_err, | ||
73 | uint64_t addr) | ||
74 | { | ||
75 | uint64_t srr1; | ||
76 | int index = __get_cpu_var(mce_nest_count)++; | ||
77 | struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); | ||
78 | |||
79 | /* | ||
80 | * Return if we don't have enough space to log mce event. | ||
81 | * mce_nest_count may go beyond MAX_MC_EVT but that's ok, | ||
82 | * the check below will stop buffer overrun. | ||
83 | */ | ||
84 | if (index >= MAX_MC_EVT) | ||
85 | return; | ||
86 | |||
87 | /* Populate generic machine check info */ | ||
88 | mce->version = MCE_V1; | ||
89 | mce->srr0 = regs->nip; | ||
90 | mce->srr1 = regs->msr; | ||
91 | mce->gpr3 = regs->gpr[3]; | ||
92 | mce->in_use = 1; | ||
93 | |||
94 | mce->initiator = MCE_INITIATOR_CPU; | ||
95 | if (handled) | ||
96 | mce->disposition = MCE_DISPOSITION_RECOVERED; | ||
97 | else | ||
98 | mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; | ||
99 | mce->severity = MCE_SEV_ERROR_SYNC; | ||
100 | |||
101 | srr1 = regs->msr; | ||
102 | |||
103 | /* | ||
104 | * Populate the mce error_type and type-specific error_type. | ||
105 | */ | ||
106 | mce_set_error_info(mce, mce_err); | ||
107 | |||
108 | if (!addr) | ||
109 | return; | ||
110 | |||
111 | if (mce->error_type == MCE_ERROR_TYPE_TLB) { | ||
112 | mce->u.tlb_error.effective_address_provided = true; | ||
113 | mce->u.tlb_error.effective_address = addr; | ||
114 | } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { | ||
115 | mce->u.slb_error.effective_address_provided = true; | ||
116 | mce->u.slb_error.effective_address = addr; | ||
117 | } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { | ||
118 | mce->u.erat_error.effective_address_provided = true; | ||
119 | mce->u.erat_error.effective_address = addr; | ||
120 | } else if (mce->error_type == MCE_ERROR_TYPE_UE) { | ||
121 | mce->u.ue_error.effective_address_provided = true; | ||
122 | mce->u.ue_error.effective_address = addr; | ||
123 | } | ||
124 | return; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * get_mce_event: | ||
129 | * mce Pointer to machine_check_event structure to be filled. | ||
130 | * release Flag to indicate whether to free the event slot or not. | ||
131 | * 0 <= do not release the mce event. Caller will invoke | ||
132 | * release_mce_event() once event has been consumed. | ||
133 | * 1 <= release the slot. | ||
134 | * | ||
135 | * return 1 = success | ||
136 | * 0 = failure | ||
137 | * | ||
138 | * get_mce_event() will be called by platform specific machine check | ||
139 | * handle routine and in KVM. | ||
140 | * When we call get_mce_event(), we are still in interrupt context and | ||
141 | * preemption will not be scheduled until ret_from_expect() routine | ||
142 | * is called. | ||
143 | */ | ||
144 | int get_mce_event(struct machine_check_event *mce, bool release) | ||
145 | { | ||
146 | int index = __get_cpu_var(mce_nest_count) - 1; | ||
147 | struct machine_check_event *mc_evt; | ||
148 | int ret = 0; | ||
149 | |||
150 | /* Sanity check */ | ||
151 | if (index < 0) | ||
152 | return ret; | ||
153 | |||
154 | /* Check if we have MCE info to process. */ | ||
155 | if (index < MAX_MC_EVT) { | ||
156 | mc_evt = &__get_cpu_var(mce_event[index]); | ||
157 | /* Copy the event structure and release the original */ | ||
158 | if (mce) | ||
159 | *mce = *mc_evt; | ||
160 | if (release) | ||
161 | mc_evt->in_use = 0; | ||
162 | ret = 1; | ||
163 | } | ||
164 | /* Decrement the count to free the slot. */ | ||
165 | if (release) | ||
166 | __get_cpu_var(mce_nest_count)--; | ||
167 | |||
168 | return ret; | ||
169 | } | ||
170 | |||
171 | void release_mce_event(void) | ||
172 | { | ||
173 | get_mce_event(NULL, true); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Queue up the MCE event which then can be handled later. | ||
178 | */ | ||
179 | void machine_check_queue_event(void) | ||
180 | { | ||
181 | int index; | ||
182 | struct machine_check_event evt; | ||
183 | |||
184 | if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) | ||
185 | return; | ||
186 | |||
187 | index = __get_cpu_var(mce_queue_count)++; | ||
188 | /* If queue is full, just return for now. */ | ||
189 | if (index >= MAX_MC_EVT) { | ||
190 | __get_cpu_var(mce_queue_count)--; | ||
191 | return; | ||
192 | } | ||
193 | __get_cpu_var(mce_event_queue[index]) = evt; | ||
194 | |||
195 | /* Queue irq work to process this event later. */ | ||
196 | irq_work_queue(&mce_event_process_work); | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * process pending MCE event from the mce event queue. This function will be | ||
201 | * called during syscall exit. | ||
202 | */ | ||
203 | static void machine_check_process_queued_event(struct irq_work *work) | ||
204 | { | ||
205 | int index; | ||
206 | |||
207 | /* | ||
208 | * For now just print it to console. | ||
209 | * TODO: log this error event to FSP or nvram. | ||
210 | */ | ||
211 | while (__get_cpu_var(mce_queue_count) > 0) { | ||
212 | index = __get_cpu_var(mce_queue_count) - 1; | ||
213 | machine_check_print_event_info( | ||
214 | &__get_cpu_var(mce_event_queue[index])); | ||
215 | __get_cpu_var(mce_queue_count)--; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | void machine_check_print_event_info(struct machine_check_event *evt) | ||
220 | { | ||
221 | const char *level, *sevstr, *subtype; | ||
222 | static const char *mc_ue_types[] = { | ||
223 | "Indeterminate", | ||
224 | "Instruction fetch", | ||
225 | "Page table walk ifetch", | ||
226 | "Load/Store", | ||
227 | "Page table walk Load/Store", | ||
228 | }; | ||
229 | static const char *mc_slb_types[] = { | ||
230 | "Indeterminate", | ||
231 | "Parity", | ||
232 | "Multihit", | ||
233 | }; | ||
234 | static const char *mc_erat_types[] = { | ||
235 | "Indeterminate", | ||
236 | "Parity", | ||
237 | "Multihit", | ||
238 | }; | ||
239 | static const char *mc_tlb_types[] = { | ||
240 | "Indeterminate", | ||
241 | "Parity", | ||
242 | "Multihit", | ||
243 | }; | ||
244 | |||
245 | /* Print things out */ | ||
246 | if (evt->version != MCE_V1) { | ||
247 | pr_err("Machine Check Exception, Unknown event version %d !\n", | ||
248 | evt->version); | ||
249 | return; | ||
250 | } | ||
251 | switch (evt->severity) { | ||
252 | case MCE_SEV_NO_ERROR: | ||
253 | level = KERN_INFO; | ||
254 | sevstr = "Harmless"; | ||
255 | break; | ||
256 | case MCE_SEV_WARNING: | ||
257 | level = KERN_WARNING; | ||
258 | sevstr = ""; | ||
259 | break; | ||
260 | case MCE_SEV_ERROR_SYNC: | ||
261 | level = KERN_ERR; | ||
262 | sevstr = "Severe"; | ||
263 | break; | ||
264 | case MCE_SEV_FATAL: | ||
265 | default: | ||
266 | level = KERN_ERR; | ||
267 | sevstr = "Fatal"; | ||
268 | break; | ||
269 | } | ||
270 | |||
271 | printk("%s%s Machine check interrupt [%s]\n", level, sevstr, | ||
272 | evt->disposition == MCE_DISPOSITION_RECOVERED ? | ||
273 | "Recovered" : "[Not recovered"); | ||
274 | printk("%s Initiator: %s\n", level, | ||
275 | evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); | ||
276 | switch (evt->error_type) { | ||
277 | case MCE_ERROR_TYPE_UE: | ||
278 | subtype = evt->u.ue_error.ue_error_type < | ||
279 | ARRAY_SIZE(mc_ue_types) ? | ||
280 | mc_ue_types[evt->u.ue_error.ue_error_type] | ||
281 | : "Unknown"; | ||
282 | printk("%s Error type: UE [%s]\n", level, subtype); | ||
283 | if (evt->u.ue_error.effective_address_provided) | ||
284 | printk("%s Effective address: %016llx\n", | ||
285 | level, evt->u.ue_error.effective_address); | ||
286 | if (evt->u.ue_error.physical_address_provided) | ||
287 | printk("%s Physial address: %016llx\n", | ||
288 | level, evt->u.ue_error.physical_address); | ||
289 | break; | ||
290 | case MCE_ERROR_TYPE_SLB: | ||
291 | subtype = evt->u.slb_error.slb_error_type < | ||
292 | ARRAY_SIZE(mc_slb_types) ? | ||
293 | mc_slb_types[evt->u.slb_error.slb_error_type] | ||
294 | : "Unknown"; | ||
295 | printk("%s Error type: SLB [%s]\n", level, subtype); | ||
296 | if (evt->u.slb_error.effective_address_provided) | ||
297 | printk("%s Effective address: %016llx\n", | ||
298 | level, evt->u.slb_error.effective_address); | ||
299 | break; | ||
300 | case MCE_ERROR_TYPE_ERAT: | ||
301 | subtype = evt->u.erat_error.erat_error_type < | ||
302 | ARRAY_SIZE(mc_erat_types) ? | ||
303 | mc_erat_types[evt->u.erat_error.erat_error_type] | ||
304 | : "Unknown"; | ||
305 | printk("%s Error type: ERAT [%s]\n", level, subtype); | ||
306 | if (evt->u.erat_error.effective_address_provided) | ||
307 | printk("%s Effective address: %016llx\n", | ||
308 | level, evt->u.erat_error.effective_address); | ||
309 | break; | ||
310 | case MCE_ERROR_TYPE_TLB: | ||
311 | subtype = evt->u.tlb_error.tlb_error_type < | ||
312 | ARRAY_SIZE(mc_tlb_types) ? | ||
313 | mc_tlb_types[evt->u.tlb_error.tlb_error_type] | ||
314 | : "Unknown"; | ||
315 | printk("%s Error type: TLB [%s]\n", level, subtype); | ||
316 | if (evt->u.tlb_error.effective_address_provided) | ||
317 | printk("%s Effective address: %016llx\n", | ||
318 | level, evt->u.tlb_error.effective_address); | ||
319 | break; | ||
320 | default: | ||
321 | case MCE_ERROR_TYPE_UNKNOWN: | ||
322 | printk("%s Error type: Unknown\n", level); | ||
323 | break; | ||
324 | } | ||
325 | } | ||
326 | |||
327 | uint64_t get_mce_fault_addr(struct machine_check_event *evt) | ||
328 | { | ||
329 | switch (evt->error_type) { | ||
330 | case MCE_ERROR_TYPE_UE: | ||
331 | if (evt->u.ue_error.effective_address_provided) | ||
332 | return evt->u.ue_error.effective_address; | ||
333 | break; | ||
334 | case MCE_ERROR_TYPE_SLB: | ||
335 | if (evt->u.slb_error.effective_address_provided) | ||
336 | return evt->u.slb_error.effective_address; | ||
337 | break; | ||
338 | case MCE_ERROR_TYPE_ERAT: | ||
339 | if (evt->u.erat_error.effective_address_provided) | ||
340 | return evt->u.erat_error.effective_address; | ||
341 | break; | ||
342 | case MCE_ERROR_TYPE_TLB: | ||
343 | if (evt->u.tlb_error.effective_address_provided) | ||
344 | return evt->u.tlb_error.effective_address; | ||
345 | break; | ||
346 | default: | ||
347 | case MCE_ERROR_TYPE_UNKNOWN: | ||
348 | break; | ||
349 | } | ||
350 | return 0; | ||
351 | } | ||
352 | EXPORT_SYMBOL(get_mce_fault_addr); | ||