aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c121
1 files changed, 27 insertions, 94 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 10cec43aac38..7f85b76f43bc 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
24static char mce_helper[128]; 24static char mce_helper[128];
25static char *mce_helper_argv[2] = { mce_helper, NULL }; 25static char *mce_helper_argv[2] = { mce_helper, NULL };
26 26
27#define mce_log_get_idx_check(p) \
28({ \
29 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
30 !lockdep_is_held(&mce_chrdev_read_mutex), \
31 "suspicious mce_log_get_idx_check() usage"); \
32 smp_load_acquire(&(p)); \
33})
34
35/* 27/*
36 * Lockless MCE logging infrastructure. 28 * Lockless MCE logging infrastructure.
37 * This avoids deadlocks on printk locks without having to break locks. Also 29 * This avoids deadlocks on printk locks without having to break locks. Also
@@ -53,43 +45,32 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
53 void *data) 45 void *data)
54{ 46{
55 struct mce *mce = (struct mce *)data; 47 struct mce *mce = (struct mce *)data;
56 unsigned int next, entry; 48 unsigned int entry;
57 49
58 wmb(); 50 mutex_lock(&mce_chrdev_read_mutex);
59 for (;;) { 51
60 entry = mce_log_get_idx_check(mcelog.next); 52 entry = mcelog.next;
61 for (;;) { 53
62 54 /*
63 /* 55 * When the buffer fills up discard new entries. Assume that the
64 * When the buffer fills up discard new entries. 56 * earlier errors are the more interesting ones:
65 * Assume that the earlier errors are the more 57 */
66 * interesting ones: 58 if (entry >= MCE_LOG_LEN) {
67 */ 59 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
68 if (entry >= MCE_LOG_LEN) { 60 goto unlock;
69 set_bit(MCE_OVERFLOW,
70 (unsigned long *)&mcelog.flags);
71 return NOTIFY_OK;
72 }
73 /* Old left over entry. Skip: */
74 if (mcelog.entry[entry].finished) {
75 entry++;
76 continue;
77 }
78 break;
79 }
80 smp_rmb();
81 next = entry + 1;
82 if (cmpxchg(&mcelog.next, entry, next) == entry)
83 break;
84 } 61 }
62
63 mcelog.next = entry + 1;
64
85 memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); 65 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
86 wmb();
87 mcelog.entry[entry].finished = 1; 66 mcelog.entry[entry].finished = 1;
88 wmb();
89 67
90 /* wake processes polling /dev/mcelog */ 68 /* wake processes polling /dev/mcelog */
91 wake_up_interruptible(&mce_chrdev_wait); 69 wake_up_interruptible(&mce_chrdev_wait);
92 70
71unlock:
72 mutex_unlock(&mce_chrdev_read_mutex);
73
93 return NOTIFY_OK; 74 return NOTIFY_OK;
94} 75}
95 76
@@ -177,13 +158,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file)
177 return 0; 158 return 0;
178} 159}
179 160
180static void collect_tscs(void *data)
181{
182 unsigned long *cpu_tsc = (unsigned long *)data;
183
184 cpu_tsc[smp_processor_id()] = rdtsc();
185}
186
187static int mce_apei_read_done; 161static int mce_apei_read_done;
188 162
189/* Collect MCE record of previous boot in persistent storage via APEI ERST. */ 163/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
@@ -231,14 +205,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
231 size_t usize, loff_t *off) 205 size_t usize, loff_t *off)
232{ 206{
233 char __user *buf = ubuf; 207 char __user *buf = ubuf;
234 unsigned long *cpu_tsc; 208 unsigned next;
235 unsigned prev, next;
236 int i, err; 209 int i, err;
237 210
238 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
239 if (!cpu_tsc)
240 return -ENOMEM;
241
242 mutex_lock(&mce_chrdev_read_mutex); 211 mutex_lock(&mce_chrdev_read_mutex);
243 212
244 if (!mce_apei_read_done) { 213 if (!mce_apei_read_done) {
@@ -247,65 +216,29 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
247 goto out; 216 goto out;
248 } 217 }
249 218
250 next = mce_log_get_idx_check(mcelog.next);
251
252 /* Only supports full reads right now */ 219 /* Only supports full reads right now */
253 err = -EINVAL; 220 err = -EINVAL;
254 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) 221 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
255 goto out; 222 goto out;
256 223
224 next = mcelog.next;
257 err = 0; 225 err = 0;
258 prev = 0;
259 do {
260 for (i = prev; i < next; i++) {
261 unsigned long start = jiffies;
262 struct mce *m = &mcelog.entry[i];
263
264 while (!m->finished) {
265 if (time_after_eq(jiffies, start + 2)) {
266 memset(m, 0, sizeof(*m));
267 goto timeout;
268 }
269 cpu_relax();
270 }
271 smp_rmb();
272 err |= copy_to_user(buf, m, sizeof(*m));
273 buf += sizeof(*m);
274timeout:
275 ;
276 }
277
278 memset(mcelog.entry + prev, 0,
279 (next - prev) * sizeof(struct mce));
280 prev = next;
281 next = cmpxchg(&mcelog.next, prev, 0);
282 } while (next != prev);
283
284 synchronize_sched();
285 226
286 /* 227 for (i = 0; i < next; i++) {
287 * Collect entries that were still getting written before the
288 * synchronize.
289 */
290 on_each_cpu(collect_tscs, cpu_tsc, 1);
291
292 for (i = next; i < MCE_LOG_LEN; i++) {
293 struct mce *m = &mcelog.entry[i]; 228 struct mce *m = &mcelog.entry[i];
294 229
295 if (m->finished && m->tsc < cpu_tsc[m->cpu]) { 230 err |= copy_to_user(buf, m, sizeof(*m));
296 err |= copy_to_user(buf, m, sizeof(*m)); 231 buf += sizeof(*m);
297 smp_rmb();
298 buf += sizeof(*m);
299 memset(m, 0, sizeof(*m));
300 }
301 } 232 }
302 233
234 memset(mcelog.entry, 0, next * sizeof(struct mce));
235 mcelog.next = 0;
236
303 if (err) 237 if (err)
304 err = -EFAULT; 238 err = -EFAULT;
305 239
306out: 240out:
307 mutex_unlock(&mce_chrdev_read_mutex); 241 mutex_unlock(&mce_chrdev_read_mutex);
308 kfree(cpu_tsc);
309 242
310 return err ? err : buf - ubuf; 243 return err ? err : buf - ubuf;
311} 244}