diff options
author | Kay Sievers <kay@vrfy.org> | 2012-05-02 20:29:41 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2012-05-07 20:03:27 -0400 |
commit | e11fea92e13fb91c50bacca799a6131c81929986 (patch) | |
tree | 3b9204f4eccc6b488b843d6d00e65dc982c50160 /kernel/printk.c | |
parent | 7ff9554bb578ba02166071d2d487b7fc7d860d62 (diff) |
kmsg: export printk records to the /dev/kmsg interface
Support for multiple concurrent readers of /dev/kmsg, with read(),
seek(), poll() support. Output of message sequence numbers, to allow
userspace log consumers to reliably reconnect and reconstruct their
state at any given time. After open("/dev/kmsg"), read() always
returns *all* buffered records. If only future messages should be
read, SEEK_END can be used. In case records get overwritten while
/dev/kmsg is held open, or records get faster overwritten than they
are read, the next read() will return -EPIPE and the current reading
position gets updated to the next available record. The passed
sequence numbers allow the log consumer to calculate the amount of
lost messages.
[root@mop ~]# cat /dev/kmsg
5,0,0;Linux version 3.4.0-rc1+ (kay@mop) (gcc version 4.7.0 20120315 ...
6,159,423091;ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
7,160,424069;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored)
SUBSYSTEM=acpi
DEVICE=+acpi:PNP0A03:00
6,339,5140900;NET: Registered protocol family 10
30,340,5690716;udevd[80]: starting version 181
6,341,6081421;FDC 0 is a S82078B
6,345,6154686;microcode: CPU0 sig=0x623, pf=0x0, revision=0x0
7,346,6156968;sr 1:0:0:0: Attached scsi CD-ROM sr0
SUBSYSTEM=scsi
DEVICE=+scsi:1:0:0:0
6,347,6289375;microcode: CPU1 sig=0x623, pf=0x0, revision=0x0
Cc: Karel Zak <kzak@redhat.com>
Tested-by: William Douglas <william.douglas@intel.com>
Signed-off-by: Kay Sievers <kay@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel/printk.c')
-rw-r--r-- | kernel/printk.c | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/kernel/printk.c b/kernel/printk.c index 74357329550f..1ccc6d986cb3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/cpu.h> | 41 | #include <linux/cpu.h> |
42 | #include <linux/notifier.h> | 42 | #include <linux/notifier.h> |
43 | #include <linux/rculist.h> | 43 | #include <linux/rculist.h> |
44 | #include <linux/poll.h> | ||
44 | 45 | ||
45 | #include <asm/uaccess.h> | 46 | #include <asm/uaccess.h> |
46 | 47 | ||
@@ -149,6 +150,48 @@ static int console_may_schedule; | |||
149 | * length of the message text is stored in the header, the stored message | 150 | * length of the message text is stored in the header, the stored message |
150 | * is not terminated. | 151 | * is not terminated. |
151 | * | 152 | * |
153 | * Optionally, a message can carry a dictionary of properties (key/value pairs), | ||
154 | * to provide userspace with a machine-readable message context. | ||
155 | * | ||
156 | * Examples for well-defined, commonly used property names are: | ||
157 | * DEVICE=b12:8 device identifier | ||
158 | * b12:8 block dev_t | ||
159 | * c127:3 char dev_t | ||
160 | * n8 netdev ifindex | ||
161 | * +sound:card0 subsystem:devname | ||
162 | * SUBSYSTEM=pci driver-core subsystem name | ||
163 | * | ||
164 | * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value | ||
165 | * follows directly after a '=' character. Every property is terminated by | ||
166 | * a '\0' character. The last property is not terminated. | ||
167 | * | ||
168 | * Example of a message structure: | ||
169 | * 0000 ff 8f 00 00 00 00 00 00 monotonic time in nsec | ||
170 | * 0008 34 00 record is 52 bytes long | ||
171 | * 000a 0b 00 text is 11 bytes long | ||
172 | * 000c 1f 00 dictionary is 23 bytes long | ||
173 | * 000e 03 00 LOG_KERN (facility) LOG_ERR (level) | ||
174 | * 0010 69 74 27 73 20 61 20 6c "it's a l" | ||
175 | * 69 6e 65 "ine" | ||
176 | * 001b 44 45 56 49 43 "DEVIC" | ||
177 | * 45 3d 62 38 3a 32 00 44 "E=b8:2\0D" | ||
178 | * 52 49 56 45 52 3d 62 75 "RIVER=bu" | ||
179 | * 67 "g" | ||
180 | * 0032 00 00 00 padding to next message header | ||
181 | * | ||
182 | * The 'struct log' buffer header must never be directly exported to | ||
183 | * userspace, it is a kernel-private implementation detail that might | ||
184 | * need to be changed in the future, when the requirements change. | ||
185 | * | ||
186 | * /dev/kmsg exports the structured data in the following line format: | ||
187 | * "level,sequnum,timestamp;<message text>\n" | ||
188 | * | ||
189 | * The optional key/value pairs are attached as continuation lines starting | ||
190 | * with a space character and terminated by a newline. All possible | ||
191 | * non-prinatable characters are escaped in the "\xff" notation. | ||
192 | * | ||
193 | * Users of the export format should ignore possible additional values | ||
194 | * separated by ',', and find the message after the ';' character. | ||
152 | */ | 195 | */ |
153 | 196 | ||
154 | struct log { | 197 | struct log { |
@@ -297,6 +340,276 @@ static void log_store(int facility, int level, | |||
297 | log_next_seq++; | 340 | log_next_seq++; |
298 | } | 341 | } |
299 | 342 | ||
343 | /* /dev/kmsg - userspace message inject/listen interface */ | ||
344 | struct devkmsg_user { | ||
345 | u64 seq; | ||
346 | u32 idx; | ||
347 | struct mutex lock; | ||
348 | char buf[8192]; | ||
349 | }; | ||
350 | |||
351 | static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, | ||
352 | unsigned long count, loff_t pos) | ||
353 | { | ||
354 | char *buf, *line; | ||
355 | int i; | ||
356 | int level = default_message_loglevel; | ||
357 | int facility = 1; /* LOG_USER */ | ||
358 | size_t len = iov_length(iv, count); | ||
359 | ssize_t ret = len; | ||
360 | |||
361 | if (len > LOG_LINE_MAX) | ||
362 | return -EINVAL; | ||
363 | buf = kmalloc(len+1, GFP_KERNEL); | ||
364 | if (buf == NULL) | ||
365 | return -ENOMEM; | ||
366 | |||
367 | line = buf; | ||
368 | for (i = 0; i < count; i++) { | ||
369 | if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) | ||
370 | goto out; | ||
371 | line += iv[i].iov_len; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace | ||
376 | * the decimal value represents 32bit, the lower 3 bit are the log | ||
377 | * level, the rest are the log facility. | ||
378 | * | ||
379 | * If no prefix or no userspace facility is specified, we | ||
380 | * enforce LOG_USER, to be able to reliably distinguish | ||
381 | * kernel-generated messages from userspace-injected ones. | ||
382 | */ | ||
383 | line = buf; | ||
384 | if (line[0] == '<') { | ||
385 | char *endp = NULL; | ||
386 | |||
387 | i = simple_strtoul(line+1, &endp, 10); | ||
388 | if (endp && endp[0] == '>') { | ||
389 | level = i & 7; | ||
390 | if (i >> 3) | ||
391 | facility = i >> 3; | ||
392 | endp++; | ||
393 | len -= endp - line; | ||
394 | line = endp; | ||
395 | } | ||
396 | } | ||
397 | line[len] = '\0'; | ||
398 | |||
399 | printk_emit(facility, level, NULL, 0, "%s", line); | ||
400 | out: | ||
401 | kfree(buf); | ||
402 | return ret; | ||
403 | } | ||
404 | |||
405 | static ssize_t devkmsg_read(struct file *file, char __user *buf, | ||
406 | size_t count, loff_t *ppos) | ||
407 | { | ||
408 | struct devkmsg_user *user = file->private_data; | ||
409 | struct log *msg; | ||
410 | size_t i; | ||
411 | size_t len; | ||
412 | ssize_t ret; | ||
413 | |||
414 | if (!user) | ||
415 | return -EBADF; | ||
416 | |||
417 | mutex_lock(&user->lock); | ||
418 | raw_spin_lock(&logbuf_lock); | ||
419 | while (user->seq == log_next_seq) { | ||
420 | if (file->f_flags & O_NONBLOCK) { | ||
421 | ret = -EAGAIN; | ||
422 | raw_spin_unlock(&logbuf_lock); | ||
423 | goto out; | ||
424 | } | ||
425 | |||
426 | raw_spin_unlock(&logbuf_lock); | ||
427 | ret = wait_event_interruptible(log_wait, | ||
428 | user->seq != log_next_seq); | ||
429 | if (ret) | ||
430 | goto out; | ||
431 | raw_spin_lock(&logbuf_lock); | ||
432 | } | ||
433 | |||
434 | if (user->seq < log_first_seq) { | ||
435 | /* our last seen message is gone, return error and reset */ | ||
436 | user->idx = log_first_idx; | ||
437 | user->seq = log_first_seq; | ||
438 | ret = -EPIPE; | ||
439 | raw_spin_unlock(&logbuf_lock); | ||
440 | goto out; | ||
441 | } | ||
442 | |||
443 | msg = log_from_idx(user->idx); | ||
444 | len = sprintf(user->buf, "%u,%llu,%llu;", | ||
445 | msg->level, user->seq, msg->ts_nsec / 1000); | ||
446 | |||
447 | /* escape non-printable characters */ | ||
448 | for (i = 0; i < msg->text_len; i++) { | ||
449 | char c = log_text(msg)[i]; | ||
450 | |||
451 | if (c < ' ' || c >= 128) | ||
452 | len += sprintf(user->buf + len, "\\x%02x", c); | ||
453 | else | ||
454 | user->buf[len++] = c; | ||
455 | } | ||
456 | user->buf[len++] = '\n'; | ||
457 | |||
458 | if (msg->dict_len) { | ||
459 | bool line = true; | ||
460 | |||
461 | for (i = 0; i < msg->dict_len; i++) { | ||
462 | char c = log_dict(msg)[i]; | ||
463 | |||
464 | if (line) { | ||
465 | user->buf[len++] = ' '; | ||
466 | line = false; | ||
467 | } | ||
468 | |||
469 | if (c == '\0') { | ||
470 | user->buf[len++] = '\n'; | ||
471 | line = true; | ||
472 | continue; | ||
473 | } | ||
474 | |||
475 | if (c < ' ' || c >= 128) { | ||
476 | len += sprintf(user->buf + len, "\\x%02x", c); | ||
477 | continue; | ||
478 | } | ||
479 | |||
480 | user->buf[len++] = c; | ||
481 | } | ||
482 | user->buf[len++] = '\n'; | ||
483 | } | ||
484 | |||
485 | user->idx = log_next(user->idx); | ||
486 | user->seq++; | ||
487 | raw_spin_unlock(&logbuf_lock); | ||
488 | |||
489 | if (len > count) { | ||
490 | ret = -EINVAL; | ||
491 | goto out; | ||
492 | } | ||
493 | |||
494 | if (copy_to_user(buf, user->buf, len)) { | ||
495 | ret = -EFAULT; | ||
496 | goto out; | ||
497 | } | ||
498 | ret = len; | ||
499 | out: | ||
500 | mutex_unlock(&user->lock); | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) | ||
505 | { | ||
506 | struct devkmsg_user *user = file->private_data; | ||
507 | loff_t ret = 0; | ||
508 | |||
509 | if (!user) | ||
510 | return -EBADF; | ||
511 | if (offset) | ||
512 | return -ESPIPE; | ||
513 | |||
514 | raw_spin_lock(&logbuf_lock); | ||
515 | switch (whence) { | ||
516 | case SEEK_SET: | ||
517 | /* the first record */ | ||
518 | user->idx = log_first_idx; | ||
519 | user->seq = log_first_seq; | ||
520 | break; | ||
521 | case SEEK_DATA: | ||
522 | /* | ||
523 | * The first record after the last SYSLOG_ACTION_CLEAR, | ||
524 | * like issued by 'dmesg -c'. Reading /dev/kmsg itself | ||
525 | * changes no global state, and does not clear anything. | ||
526 | */ | ||
527 | user->idx = clear_idx; | ||
528 | user->seq = clear_seq; | ||
529 | break; | ||
530 | case SEEK_END: | ||
531 | /* after the last record */ | ||
532 | user->idx = log_next_idx; | ||
533 | user->seq = log_next_seq; | ||
534 | break; | ||
535 | default: | ||
536 | ret = -EINVAL; | ||
537 | } | ||
538 | raw_spin_unlock(&logbuf_lock); | ||
539 | return ret; | ||
540 | } | ||
541 | |||
542 | static unsigned int devkmsg_poll(struct file *file, poll_table *wait) | ||
543 | { | ||
544 | struct devkmsg_user *user = file->private_data; | ||
545 | int ret = 0; | ||
546 | |||
547 | if (!user) | ||
548 | return POLLERR|POLLNVAL; | ||
549 | |||
550 | poll_wait(file, &log_wait, wait); | ||
551 | |||
552 | raw_spin_lock(&logbuf_lock); | ||
553 | if (user->seq < log_next_seq) { | ||
554 | /* return error when data has vanished underneath us */ | ||
555 | if (user->seq < log_first_seq) | ||
556 | ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; | ||
557 | ret = POLLIN|POLLRDNORM; | ||
558 | } | ||
559 | raw_spin_unlock(&logbuf_lock); | ||
560 | |||
561 | return ret; | ||
562 | } | ||
563 | |||
564 | static int devkmsg_open(struct inode *inode, struct file *file) | ||
565 | { | ||
566 | struct devkmsg_user *user; | ||
567 | int err; | ||
568 | |||
569 | /* write-only does not need any file context */ | ||
570 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) | ||
571 | return 0; | ||
572 | |||
573 | err = security_syslog(SYSLOG_ACTION_READ_ALL); | ||
574 | if (err) | ||
575 | return err; | ||
576 | |||
577 | user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); | ||
578 | if (!user) | ||
579 | return -ENOMEM; | ||
580 | |||
581 | mutex_init(&user->lock); | ||
582 | |||
583 | raw_spin_lock(&logbuf_lock); | ||
584 | user->idx = log_first_idx; | ||
585 | user->seq = log_first_seq; | ||
586 | raw_spin_unlock(&logbuf_lock); | ||
587 | |||
588 | file->private_data = user; | ||
589 | return 0; | ||
590 | } | ||
591 | |||
592 | static int devkmsg_release(struct inode *inode, struct file *file) | ||
593 | { | ||
594 | struct devkmsg_user *user = file->private_data; | ||
595 | |||
596 | if (!user) | ||
597 | return 0; | ||
598 | |||
599 | mutex_destroy(&user->lock); | ||
600 | kfree(user); | ||
601 | return 0; | ||
602 | } | ||
603 | |||
604 | const struct file_operations kmsg_fops = { | ||
605 | .open = devkmsg_open, | ||
606 | .read = devkmsg_read, | ||
607 | .aio_write = devkmsg_writev, | ||
608 | .llseek = devkmsg_llseek, | ||
609 | .poll = devkmsg_poll, | ||
610 | .release = devkmsg_release, | ||
611 | }; | ||
612 | |||
300 | #ifdef CONFIG_KEXEC | 613 | #ifdef CONFIG_KEXEC |
301 | /* | 614 | /* |
302 | * This appends the listed symbols to /proc/vmcoreinfo | 615 | * This appends the listed symbols to /proc/vmcoreinfo |