Commit | Line | Data |
---|---|---|
53f2d028 MCC |
1 | #undef TRACE_SYSTEM |
2 | #define TRACE_SYSTEM ras | |
3 | #define TRACE_INCLUDE_FILE ras_event | |
4 | ||
5 | #if !defined(_TRACE_HW_EVENT_MC_H) || defined(TRACE_HEADER_MULTI_READ) | |
6 | #define _TRACE_HW_EVENT_MC_H | |
7 | ||
8 | #include <linux/tracepoint.h> | |
9 | #include <linux/edac.h> | |
10 | #include <linux/ktime.h> | |
0a2409aa | 11 | #include <linux/aer.h> |
53f2d028 MCC |
12 | |
13 | /* | |
14 | * Hardware Events Report | |
15 | * | |
16 | * Those events are generated when hardware detected a corrected or | |
17 | * uncorrected event, and are meant to replace the current API to report | |
18 | * errors defined on both EDAC and MCE subsystems. | |
19 | * | |
20 | * FIXME: Add events for handling memory errors originated from the | |
21 | * MCE subsystem. | |
22 | */ | |
23 | ||
24 | /* | |
25 | * Hardware-independent Memory Controller specific events | |
26 | */ | |
27 | ||
28 | /* | |
29 | * Default error mechanisms for Memory Controller errors (CE and UE) | |
30 | */ | |
31 | TRACE_EVENT(mc_event, | |
32 | ||
33 | TP_PROTO(const unsigned int err_type, | |
34 | const char *error_msg, | |
35 | const char *label, | |
36 | const int error_count, | |
37 | const u8 mc_index, | |
38 | const s8 top_layer, | |
39 | const s8 mid_layer, | |
40 | const s8 low_layer, | |
41 | unsigned long address, | |
42 | const u8 grain_bits, | |
43 | unsigned long syndrome, | |
44 | const char *driver_detail), | |
45 | ||
46 | TP_ARGS(err_type, error_msg, label, error_count, mc_index, | |
47 | top_layer, mid_layer, low_layer, address, grain_bits, | |
48 | syndrome, driver_detail), | |
49 | ||
50 | TP_STRUCT__entry( | |
51 | __field( unsigned int, error_type ) | |
52 | __string( msg, error_msg ) | |
53 | __string( label, label ) | |
54 | __field( u16, error_count ) | |
55 | __field( u8, mc_index ) | |
56 | __field( s8, top_layer ) | |
57 | __field( s8, middle_layer ) | |
58 | __field( s8, lower_layer ) | |
59 | __field( long, address ) | |
60 | __field( u8, grain_bits ) | |
61 | __field( long, syndrome ) | |
62 | __string( driver_detail, driver_detail ) | |
63 | ), | |
64 | ||
65 | TP_fast_assign( | |
66 | __entry->error_type = err_type; | |
67 | __assign_str(msg, error_msg); | |
68 | __assign_str(label, label); | |
69 | __entry->error_count = error_count; | |
70 | __entry->mc_index = mc_index; | |
71 | __entry->top_layer = top_layer; | |
72 | __entry->middle_layer = mid_layer; | |
73 | __entry->lower_layer = low_layer; | |
74 | __entry->address = address; | |
75 | __entry->grain_bits = grain_bits; | |
76 | __entry->syndrome = syndrome; | |
77 | __assign_str(driver_detail, driver_detail); | |
78 | ), | |
79 | ||
80 | TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)", | |
81 | __entry->error_count, | |
8dd93d45 | 82 | mc_event_error_type(__entry->error_type), |
53f2d028 MCC |
83 | __entry->error_count > 1 ? "s" : "", |
84 | ((char *)__get_str(msg))[0] ? " " : "", | |
85 | __get_str(msg), | |
86 | __get_str(label), | |
87 | __entry->mc_index, | |
88 | __entry->top_layer, | |
89 | __entry->middle_layer, | |
90 | __entry->lower_layer, | |
91 | __entry->address, | |
92 | 1 << __entry->grain_bits, | |
93 | __entry->syndrome, | |
94 | ((char *)__get_str(driver_detail))[0] ? " " : "", | |
95 | __get_str(driver_detail)) | |
96 | ); | |
97 | ||
0a2409aa CG |
98 | /* |
99 | * PCIe AER Trace event | |
100 | * | |
101 | * These events are generated when hardware detects a corrected or | |
102 | * uncorrected event on a PCIe device. The event report has | |
103 | * the following structure: | |
104 | * | |
105 | * char * dev_name - The name of the slot where the device resides | |
106 | * ([domain:]bus:device.function). | |
107 | * u32 status - Either the correctable or uncorrectable register | |
108 | * indicating what error or errors have been seen | |
109 | * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED | |
110 | */ | |
111 | ||
112 | #define aer_correctable_errors \ | |
113 | {BIT(0), "Receiver Error"}, \ | |
114 | {BIT(6), "Bad TLP"}, \ | |
115 | {BIT(7), "Bad DLLP"}, \ | |
116 | {BIT(8), "RELAY_NUM Rollover"}, \ | |
117 | {BIT(12), "Replay Timer Timeout"}, \ | |
118 | {BIT(13), "Advisory Non-Fatal"} | |
119 | ||
120 | #define aer_uncorrectable_errors \ | |
121 | {BIT(4), "Data Link Protocol"}, \ | |
122 | {BIT(12), "Poisoned TLP"}, \ | |
123 | {BIT(13), "Flow Control Protocol"}, \ | |
124 | {BIT(14), "Completion Timeout"}, \ | |
125 | {BIT(15), "Completer Abort"}, \ | |
126 | {BIT(16), "Unexpected Completion"}, \ | |
127 | {BIT(17), "Receiver Overflow"}, \ | |
128 | {BIT(18), "Malformed TLP"}, \ | |
129 | {BIT(19), "ECRC"}, \ | |
130 | {BIT(20), "Unsupported Request"} | |
131 | ||
132 | TRACE_EVENT(aer_event, | |
133 | TP_PROTO(const char *dev_name, | |
134 | const u32 status, | |
135 | const u8 severity), | |
136 | ||
137 | TP_ARGS(dev_name, status, severity), | |
138 | ||
139 | TP_STRUCT__entry( | |
140 | __string( dev_name, dev_name ) | |
141 | __field( u32, status ) | |
142 | __field( u8, severity ) | |
143 | ), | |
144 | ||
145 | TP_fast_assign( | |
146 | __assign_str(dev_name, dev_name); | |
147 | __entry->status = status; | |
148 | __entry->severity = severity; | |
149 | ), | |
150 | ||
151 | TP_printk("%s PCIe Bus Error: severity=%s, %s\n", | |
152 | __get_str(dev_name), | |
153 | __entry->severity == AER_CORRECTABLE ? "Corrected" : | |
154 | __entry->severity == AER_FATAL ? | |
155 | "Fatal" : "Uncorrected, non-fatal", | |
156 | __entry->severity == AER_CORRECTABLE ? | |
157 | __print_flags(__entry->status, "|", aer_correctable_errors) : | |
158 | __print_flags(__entry->status, "|", aer_uncorrectable_errors)) | |
159 | ); | |
160 | ||
53f2d028 MCC |
161 | #endif /* _TRACE_HW_EVENT_MC_H */ |
162 | ||
163 | /* This part must be outside protection */ | |
164 | #include <trace/define_trace.h> |