diff options
author | Anton Blanchard <anton@samba.org> | 2011-01-11 14:50:51 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2011-01-20 22:08:38 -0500 |
commit | d368514c3097a48a109b6ba64e12047106c7473d (patch) | |
tree | bdd892bec071fc042f70c1875642175cbcc782cb /arch/powerpc | |
parent | d47d1d8af52e37bcf9059dd86878474e5ccc9c2a (diff) |
powerpc: Fix corruption when grabbing FWNMI data
The FWNMI code uses a global buffer without any locks to read the RTAS error
information. If two CPUs take a machine check at once then we will corrupt
this buffer.
Since most FWNMI rtas messages are not of the extended type, we can create a
64bit percpu buffer and use it where possible. If we do receive an extended
RTAS log then we fall back to the old behaviour of using the global buffer.
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 52 |
1 files changed, 38 insertions, 14 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index d194150cf342..c55d7ad9c648 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c | |||
@@ -54,7 +54,8 @@ | |||
54 | static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; | 54 | static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; |
55 | static DEFINE_SPINLOCK(ras_log_buf_lock); | 55 | static DEFINE_SPINLOCK(ras_log_buf_lock); |
56 | 56 | ||
57 | static char mce_data_buf[RTAS_ERROR_LOG_MAX]; | 57 | static char global_mce_data_buf[RTAS_ERROR_LOG_MAX]; |
58 | static DEFINE_PER_CPU(__u64, mce_data_buf); | ||
58 | 59 | ||
59 | static int ras_get_sensor_state_token; | 60 | static int ras_get_sensor_state_token; |
60 | static int ras_check_exception_token; | 61 | static int ras_check_exception_token; |
@@ -196,12 +197,24 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) | |||
196 | return IRQ_HANDLED; | 197 | return IRQ_HANDLED; |
197 | } | 198 | } |
198 | 199 | ||
199 | /* Get the error information for errors coming through the | 200 | /* |
201 | * Some versions of FWNMI place the buffer inside the 4kB page starting at | ||
202 | * 0x7000. Other versions place it inside the rtas buffer. We check both. | ||
203 | */ | ||
204 | #define VALID_FWNMI_BUFFER(A) \ | ||
205 | ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ | ||
206 | (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) | ||
207 | |||
208 | /* | ||
209 | * Get the error information for errors coming through the | ||
200 | * FWNMI vectors. The pt_regs' r3 will be updated to reflect | 210 | * FWNMI vectors. The pt_regs' r3 will be updated to reflect |
201 | * the actual r3 if possible, and a ptr to the error log entry | 211 | * the actual r3 if possible, and a ptr to the error log entry |
202 | * will be returned if found. | 212 | * will be returned if found. |
203 | * | 213 | * |
204 | * The mce_data_buf does not have any locks or protection around it, | 214 | * If the RTAS error is not of the extended type, then we put it in a per |
215 | * cpu 64bit buffer. If it is the extended type we use global_mce_data_buf. | ||
216 | * | ||
217 | * The global_mce_data_buf does not have any locks or protection around it, | ||
205 | * if a second machine check comes in, or a system reset is done | 218 | * if a second machine check comes in, or a system reset is done |
206 | * before we have logged the error, then we will get corruption in the | 219 | * before we have logged the error, then we will get corruption in the |
207 | * error log. This is preferable over holding off on calling | 220 | * error log. This is preferable over holding off on calling |
@@ -210,20 +223,31 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) | |||
210 | */ | 223 | */ |
211 | static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) | 224 | static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) |
212 | { | 225 | { |
213 | unsigned long errdata = regs->gpr[3]; | ||
214 | struct rtas_error_log *errhdr = NULL; | ||
215 | unsigned long *savep; | 226 | unsigned long *savep; |
227 | struct rtas_error_log *h, *errhdr = NULL; | ||
216 | 228 | ||
217 | if ((errdata >= 0x7000 && errdata < 0x7fff0) || | 229 | if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { |
218 | (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) { | 230 | printk(KERN_ERR "FWNMI: corrupt r3\n"); |
219 | savep = __va(errdata); | 231 | return NULL; |
220 | regs->gpr[3] = savep[0]; /* restore original r3 */ | 232 | } |
221 | memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX); | 233 | |
222 | memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX); | 234 | savep = __va(regs->gpr[3]); |
223 | errhdr = (struct rtas_error_log *)mce_data_buf; | 235 | regs->gpr[3] = savep[0]; /* restore original r3 */ |
236 | |||
237 | /* If it isn't an extended log we can use the per cpu 64bit buffer */ | ||
238 | h = (struct rtas_error_log *)&savep[1]; | ||
239 | if (!h->extended) { | ||
240 | memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); | ||
241 | errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); | ||
224 | } else { | 242 | } else { |
225 | printk("FWNMI: corrupt r3\n"); | 243 | int len; |
244 | |||
245 | len = max_t(int, 8+h->extended_log_length, RTAS_ERROR_LOG_MAX); | ||
246 | memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); | ||
247 | memcpy(global_mce_data_buf, h, len); | ||
248 | errhdr = (struct rtas_error_log *)global_mce_data_buf; | ||
226 | } | 249 | } |
250 | |||
227 | return errhdr; | 251 | return errhdr; |
228 | } | 252 | } |
229 | 253 | ||
@@ -235,7 +259,7 @@ static void fwnmi_release_errinfo(void) | |||
235 | { | 259 | { |
236 | int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); | 260 | int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); |
237 | if (ret != 0) | 261 | if (ret != 0) |
238 | printk("FWNMI: nmi-interlock failed: %d\n", ret); | 262 | printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); |
239 | } | 263 | } |
240 | 264 | ||
241 | int pSeries_system_reset_exception(struct pt_regs *regs) | 265 | int pSeries_system_reset_exception(struct pt_regs *regs) |