aboutsummaryrefslogtreecommitdiffstats
path: root/include/ras
diff options
context:
space:
mode:
authorXie XiuQi <xiexiuqi@huawei.com>2015-06-24 19:57:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 20:49:43 -0400
commit97f0b13452198290799fd6780f05fbaa74f927d3 (patch)
tree06401ca906140b76ed4206968a412eb860bd65c0 /include/ras
parentcc3e2af42e7b7e0457b93bf17c19b44c635cd40c (diff)
tracing: add trace event for memory-failure
RAS user space tools like rasdaemon which base on trace event, could receive mce error event, but no memory recovery result event. So, I want to add this event to make this scenario complete. This patch add a event at ras group for memory-failure. The output like below: # tracer: nop # # entries-in-buffer/entries-written: 2/2 #P:24 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | mce-inject-13150 [001] .... 277.019359: memory_failure_event: pfn 0x19869: recovery action for free buddy page: Delayed [xiexiuqi@huawei.com: fix build error] Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Acked-by: Steven Rostedt <rostedt@goodmis.org> Cc: Tony Luck <tony.luck@intel.com> Cc: Chen Gong <gong.chen@linux.intel.com> Cc: Jim Davis <jim.epost@gmail.com> Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/ras')
-rw-r--r--include/ras/ras_event.h85
1 files changed, 85 insertions, 0 deletions
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 79abb9c71772..1443d79e4fe6 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -11,6 +11,7 @@
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/aer.h> 12#include <linux/aer.h>
13#include <linux/cper.h> 13#include <linux/cper.h>
14#include <linux/mm.h>
14 15
15/* 16/*
16 * MCE Extended Error Log trace event 17 * MCE Extended Error Log trace event
@@ -232,6 +233,90 @@ TRACE_EVENT(aer_event,
232 __print_flags(__entry->status, "|", aer_uncorrectable_errors)) 233 __print_flags(__entry->status, "|", aer_uncorrectable_errors))
233); 234);
234 235
236/*
237 * memory-failure recovery action result event
238 *
239 * unsigned long pfn - Page Frame Number of the corrupted page
240 * int type - Page types of the corrupted page
241 * int result - Result of recovery action
242 */
243
244#ifdef CONFIG_MEMORY_FAILURE
245#define MF_ACTION_RESULT \
246 EM ( MF_IGNORED, "Ignored" ) \
247 EM ( MF_FAILED, "Failed" ) \
248 EM ( MF_DELAYED, "Delayed" ) \
249 EMe ( MF_RECOVERED, "Recovered" )
250
251#define MF_PAGE_TYPE \
252 EM ( MF_MSG_KERNEL, "reserved kernel page" ) \
253 EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \
254 EM ( MF_MSG_SLAB, "kernel slab page" ) \
255 EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
256 EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \
257 EM ( MF_MSG_HUGE, "huge page" ) \
258 EM ( MF_MSG_FREE_HUGE, "free huge page" ) \
259 EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \
260 EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \
261 EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \
262 EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \
263 EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \
264 EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \
265 EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \
266 EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \
267 EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \
268 EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \
269 EM ( MF_MSG_BUDDY, "free buddy page" ) \
270 EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \
271 EMe ( MF_MSG_UNKNOWN, "unknown page" )
272
273/*
274 * First define the enums in MM_ACTION_RESULT to be exported to userspace
275 * via TRACE_DEFINE_ENUM().
276 */
277#undef EM
278#undef EMe
279#define EM(a, b) TRACE_DEFINE_ENUM(a);
280#define EMe(a, b) TRACE_DEFINE_ENUM(a);
281
282MF_ACTION_RESULT
283MF_PAGE_TYPE
284
285/*
286 * Now redefine the EM() and EMe() macros to map the enums to the strings
287 * that will be printed in the output.
288 */
289#undef EM
290#undef EMe
291#define EM(a, b) { a, b },
292#define EMe(a, b) { a, b }
293
294TRACE_EVENT(memory_failure_event,
295 TP_PROTO(unsigned long pfn,
296 int type,
297 int result),
298
299 TP_ARGS(pfn, type, result),
300
301 TP_STRUCT__entry(
302 __field(unsigned long, pfn)
303 __field(int, type)
304 __field(int, result)
305 ),
306
307 TP_fast_assign(
308 __entry->pfn = pfn;
309 __entry->type = type;
310 __entry->result = result;
311 ),
312
313 TP_printk("pfn %#lx: recovery action for %s: %s",
314 __entry->pfn,
315 __print_symbolic(__entry->type, MF_PAGE_TYPE),
316 __print_symbolic(__entry->result, MF_ACTION_RESULT)
317 )
318);
319#endif /* CONFIG_MEMORY_FAILURE */
235#endif /* _TRACE_HW_EVENT_MC_H */ 320#endif /* _TRACE_HW_EVENT_MC_H */
236 321
237/* This part must be outside protection */ 322/* This part must be outside protection */