diff options
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/dev-mcelog.c | 121 |
1 files changed, 27 insertions, 94 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c index 10cec43aac38..7f85b76f43bc 100644 --- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c | |||
| @@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); | |||
| 24 | static char mce_helper[128]; | 24 | static char mce_helper[128]; |
| 25 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | 25 | static char *mce_helper_argv[2] = { mce_helper, NULL }; |
| 26 | 26 | ||
| 27 | #define mce_log_get_idx_check(p) \ | ||
| 28 | ({ \ | ||
| 29 | RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | ||
| 30 | !lockdep_is_held(&mce_chrdev_read_mutex), \ | ||
| 31 | "suspicious mce_log_get_idx_check() usage"); \ | ||
| 32 | smp_load_acquire(&(p)); \ | ||
| 33 | }) | ||
| 34 | |||
| 35 | /* | 27 | /* |
| 36 | * Lockless MCE logging infrastructure. | 28 | * Lockless MCE logging infrastructure. |
| 37 | * This avoids deadlocks on printk locks without having to break locks. Also | 29 | * This avoids deadlocks on printk locks without having to break locks. Also |
| @@ -53,43 +45,32 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val, | |||
| 53 | void *data) | 45 | void *data) |
| 54 | { | 46 | { |
| 55 | struct mce *mce = (struct mce *)data; | 47 | struct mce *mce = (struct mce *)data; |
| 56 | unsigned int next, entry; | 48 | unsigned int entry; |
| 57 | 49 | ||
| 58 | wmb(); | 50 | mutex_lock(&mce_chrdev_read_mutex); |
| 59 | for (;;) { | 51 | |
| 60 | entry = mce_log_get_idx_check(mcelog.next); | 52 | entry = mcelog.next; |
| 61 | for (;;) { | 53 | |
| 62 | 54 | /* | |
| 63 | /* | 55 | * When the buffer fills up discard new entries. Assume that the |
| 64 | * When the buffer fills up discard new entries. | 56 | * earlier errors are the more interesting ones: |
| 65 | * Assume that the earlier errors are the more | 57 | */ |
| 66 | * interesting ones: | 58 | if (entry >= MCE_LOG_LEN) { |
| 67 | */ | 59 | set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); |
| 68 | if (entry >= MCE_LOG_LEN) { | 60 | goto unlock; |
| 69 | set_bit(MCE_OVERFLOW, | ||
| 70 | (unsigned long *)&mcelog.flags); | ||
| 71 | return NOTIFY_OK; | ||
| 72 | } | ||
| 73 | /* Old left over entry. Skip: */ | ||
| 74 | if (mcelog.entry[entry].finished) { | ||
| 75 | entry++; | ||
| 76 | continue; | ||
| 77 | } | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | smp_rmb(); | ||
| 81 | next = entry + 1; | ||
| 82 | if (cmpxchg(&mcelog.next, entry, next) == entry) | ||
| 83 | break; | ||
| 84 | } | 61 | } |
| 62 | |||
| 63 | mcelog.next = entry + 1; | ||
| 64 | |||
| 85 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | 65 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); |
| 86 | wmb(); | ||
| 87 | mcelog.entry[entry].finished = 1; | 66 | mcelog.entry[entry].finished = 1; |
| 88 | wmb(); | ||
| 89 | 67 | ||
| 90 | /* wake processes polling /dev/mcelog */ | 68 | /* wake processes polling /dev/mcelog */ |
| 91 | wake_up_interruptible(&mce_chrdev_wait); | 69 | wake_up_interruptible(&mce_chrdev_wait); |
| 92 | 70 | ||
| 71 | unlock: | ||
| 72 | mutex_unlock(&mce_chrdev_read_mutex); | ||
| 73 | |||
| 93 | return NOTIFY_OK; | 74 | return NOTIFY_OK; |
| 94 | } | 75 | } |
| 95 | 76 | ||
| @@ -177,13 +158,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file) | |||
| 177 | return 0; | 158 | return 0; |
| 178 | } | 159 | } |
| 179 | 160 | ||
| 180 | static void collect_tscs(void *data) | ||
| 181 | { | ||
| 182 | unsigned long *cpu_tsc = (unsigned long *)data; | ||
| 183 | |||
| 184 | cpu_tsc[smp_processor_id()] = rdtsc(); | ||
| 185 | } | ||
| 186 | |||
| 187 | static int mce_apei_read_done; | 161 | static int mce_apei_read_done; |
| 188 | 162 | ||
| 189 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | 163 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ |
| @@ -231,14 +205,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, | |||
| 231 | size_t usize, loff_t *off) | 205 | size_t usize, loff_t *off) |
| 232 | { | 206 | { |
| 233 | char __user *buf = ubuf; | 207 | char __user *buf = ubuf; |
| 234 | unsigned long *cpu_tsc; | 208 | unsigned next; |
| 235 | unsigned prev, next; | ||
| 236 | int i, err; | 209 | int i, err; |
| 237 | 210 | ||
| 238 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | ||
| 239 | if (!cpu_tsc) | ||
| 240 | return -ENOMEM; | ||
| 241 | |||
| 242 | mutex_lock(&mce_chrdev_read_mutex); | 211 | mutex_lock(&mce_chrdev_read_mutex); |
| 243 | 212 | ||
| 244 | if (!mce_apei_read_done) { | 213 | if (!mce_apei_read_done) { |
| @@ -247,65 +216,29 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, | |||
| 247 | goto out; | 216 | goto out; |
| 248 | } | 217 | } |
| 249 | 218 | ||
| 250 | next = mce_log_get_idx_check(mcelog.next); | ||
| 251 | |||
| 252 | /* Only supports full reads right now */ | 219 | /* Only supports full reads right now */ |
| 253 | err = -EINVAL; | 220 | err = -EINVAL; |
| 254 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) | 221 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) |
| 255 | goto out; | 222 | goto out; |
| 256 | 223 | ||
| 224 | next = mcelog.next; | ||
| 257 | err = 0; | 225 | err = 0; |
| 258 | prev = 0; | ||
| 259 | do { | ||
| 260 | for (i = prev; i < next; i++) { | ||
| 261 | unsigned long start = jiffies; | ||
| 262 | struct mce *m = &mcelog.entry[i]; | ||
| 263 | |||
| 264 | while (!m->finished) { | ||
| 265 | if (time_after_eq(jiffies, start + 2)) { | ||
| 266 | memset(m, 0, sizeof(*m)); | ||
| 267 | goto timeout; | ||
| 268 | } | ||
| 269 | cpu_relax(); | ||
| 270 | } | ||
| 271 | smp_rmb(); | ||
| 272 | err |= copy_to_user(buf, m, sizeof(*m)); | ||
| 273 | buf += sizeof(*m); | ||
| 274 | timeout: | ||
| 275 | ; | ||
| 276 | } | ||
| 277 | |||
| 278 | memset(mcelog.entry + prev, 0, | ||
| 279 | (next - prev) * sizeof(struct mce)); | ||
| 280 | prev = next; | ||
| 281 | next = cmpxchg(&mcelog.next, prev, 0); | ||
| 282 | } while (next != prev); | ||
| 283 | |||
| 284 | synchronize_sched(); | ||
| 285 | 226 | ||
| 286 | /* | 227 | for (i = 0; i < next; i++) { |
| 287 | * Collect entries that were still getting written before the | ||
| 288 | * synchronize. | ||
| 289 | */ | ||
| 290 | on_each_cpu(collect_tscs, cpu_tsc, 1); | ||
| 291 | |||
| 292 | for (i = next; i < MCE_LOG_LEN; i++) { | ||
| 293 | struct mce *m = &mcelog.entry[i]; | 228 | struct mce *m = &mcelog.entry[i]; |
| 294 | 229 | ||
| 295 | if (m->finished && m->tsc < cpu_tsc[m->cpu]) { | 230 | err |= copy_to_user(buf, m, sizeof(*m)); |
| 296 | err |= copy_to_user(buf, m, sizeof(*m)); | 231 | buf += sizeof(*m); |
| 297 | smp_rmb(); | ||
| 298 | buf += sizeof(*m); | ||
| 299 | memset(m, 0, sizeof(*m)); | ||
| 300 | } | ||
| 301 | } | 232 | } |
| 302 | 233 | ||
| 234 | memset(mcelog.entry, 0, next * sizeof(struct mce)); | ||
| 235 | mcelog.next = 0; | ||
| 236 | |||
| 303 | if (err) | 237 | if (err) |
| 304 | err = -EFAULT; | 238 | err = -EFAULT; |
| 305 | 239 | ||
| 306 | out: | 240 | out: |
| 307 | mutex_unlock(&mce_chrdev_read_mutex); | 241 | mutex_unlock(&mce_chrdev_read_mutex); |
| 308 | kfree(cpu_tsc); | ||
| 309 | 242 | ||
| 310 | return err ? err : buf - ubuf; | 243 | return err ? err : buf - ubuf; |
| 311 | } | 244 | } |
