diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup.c | 13 | ||||
| -rw-r--r-- | kernel/events/core.c | 10 | ||||
| -rw-r--r-- | kernel/exit.c | 19 | ||||
| -rw-r--r-- | kernel/panic.c | 6 | ||||
| -rw-r--r-- | kernel/pid_namespace.c | 20 | ||||
| -rw-r--r-- | kernel/printk.c | 532 | ||||
| -rw-r--r-- | kernel/rcutree.c | 16 | ||||
| -rw-r--r-- | kernel/rcutree.h | 14 | ||||
| -rw-r--r-- | kernel/rcutree_plugin.h | 165 | ||||
| -rw-r--r-- | kernel/sys.c | 6 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 7 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 2 | ||||
| -rw-r--r-- | kernel/watchdog.c | 19 |
13 files changed, 610 insertions, 219 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 72fcd3069a90..2097684cf194 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -255,12 +255,17 @@ int cgroup_lock_is_held(void) | |||
| 255 | 255 | ||
| 256 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); | 256 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); |
| 257 | 257 | ||
| 258 | static int css_unbias_refcnt(int refcnt) | ||
| 259 | { | ||
| 260 | return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS; | ||
| 261 | } | ||
| 262 | |||
| 258 | /* the current nr of refs, always >= 0 whether @css is deactivated or not */ | 263 | /* the current nr of refs, always >= 0 whether @css is deactivated or not */ |
| 259 | static int css_refcnt(struct cgroup_subsys_state *css) | 264 | static int css_refcnt(struct cgroup_subsys_state *css) |
| 260 | { | 265 | { |
| 261 | int v = atomic_read(&css->refcnt); | 266 | int v = atomic_read(&css->refcnt); |
| 262 | 267 | ||
| 263 | return v >= 0 ? v : v - CSS_DEACT_BIAS; | 268 | return css_unbias_refcnt(v); |
| 264 | } | 269 | } |
| 265 | 270 | ||
| 266 | /* convenient tests for these bits */ | 271 | /* convenient tests for these bits */ |
| @@ -4982,10 +4987,12 @@ EXPORT_SYMBOL_GPL(__css_tryget); | |||
| 4982 | void __css_put(struct cgroup_subsys_state *css) | 4987 | void __css_put(struct cgroup_subsys_state *css) |
| 4983 | { | 4988 | { |
| 4984 | struct cgroup *cgrp = css->cgroup; | 4989 | struct cgroup *cgrp = css->cgroup; |
| 4990 | int v; | ||
| 4985 | 4991 | ||
| 4986 | rcu_read_lock(); | 4992 | rcu_read_lock(); |
| 4987 | atomic_dec(&css->refcnt); | 4993 | v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); |
| 4988 | switch (css_refcnt(css)) { | 4994 | |
| 4995 | switch (v) { | ||
| 4989 | case 1: | 4996 | case 1: |
| 4990 | if (notify_on_release(cgrp)) { | 4997 | if (notify_on_release(cgrp)) { |
| 4991 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 4998 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index f85c0154b333..d7d71d6ec972 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -253,9 +253,9 @@ perf_cgroup_match(struct perf_event *event) | |||
| 253 | return !event->cgrp || event->cgrp == cpuctx->cgrp; | 253 | return !event->cgrp || event->cgrp == cpuctx->cgrp; |
| 254 | } | 254 | } |
| 255 | 255 | ||
| 256 | static inline void perf_get_cgroup(struct perf_event *event) | 256 | static inline bool perf_tryget_cgroup(struct perf_event *event) |
| 257 | { | 257 | { |
| 258 | css_get(&event->cgrp->css); | 258 | return css_tryget(&event->cgrp->css); |
| 259 | } | 259 | } |
| 260 | 260 | ||
| 261 | static inline void perf_put_cgroup(struct perf_event *event) | 261 | static inline void perf_put_cgroup(struct perf_event *event) |
| @@ -484,7 +484,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, | |||
| 484 | event->cgrp = cgrp; | 484 | event->cgrp = cgrp; |
| 485 | 485 | ||
| 486 | /* must be done before we fput() the file */ | 486 | /* must be done before we fput() the file */ |
| 487 | perf_get_cgroup(event); | 487 | if (!perf_tryget_cgroup(event)) { |
| 488 | event->cgrp = NULL; | ||
| 489 | ret = -ENOENT; | ||
| 490 | goto out; | ||
| 491 | } | ||
| 488 | 492 | ||
| 489 | /* | 493 | /* |
| 490 | * all events in a group must monitor | 494 | * all events in a group must monitor |
diff --git a/kernel/exit.c b/kernel/exit.c index 34867cc5b42a..2f59cc334516 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -72,6 +72,18 @@ static void __unhash_process(struct task_struct *p, bool group_dead) | |||
| 72 | list_del_rcu(&p->tasks); | 72 | list_del_rcu(&p->tasks); |
| 73 | list_del_init(&p->sibling); | 73 | list_del_init(&p->sibling); |
| 74 | __this_cpu_dec(process_counts); | 74 | __this_cpu_dec(process_counts); |
| 75 | /* | ||
| 76 | * If we are the last child process in a pid namespace to be | ||
| 77 | * reaped, notify the reaper sleeping zap_pid_ns_processes(). | ||
| 78 | */ | ||
| 79 | if (IS_ENABLED(CONFIG_PID_NS)) { | ||
| 80 | struct task_struct *parent = p->real_parent; | ||
| 81 | |||
| 82 | if ((task_active_pid_ns(parent)->child_reaper == parent) && | ||
| 83 | list_empty(&parent->children) && | ||
| 84 | (parent->flags & PF_EXITING)) | ||
| 85 | wake_up_process(parent); | ||
| 86 | } | ||
| 75 | } | 87 | } |
| 76 | list_del_rcu(&p->thread_group); | 88 | list_del_rcu(&p->thread_group); |
| 77 | } | 89 | } |
| @@ -643,6 +655,7 @@ static void exit_mm(struct task_struct * tsk) | |||
| 643 | mm_release(tsk, mm); | 655 | mm_release(tsk, mm); |
| 644 | if (!mm) | 656 | if (!mm) |
| 645 | return; | 657 | return; |
| 658 | sync_mm_rss(mm); | ||
| 646 | /* | 659 | /* |
| 647 | * Serialize with any possible pending coredump. | 660 | * Serialize with any possible pending coredump. |
| 648 | * We must hold mmap_sem around checking core_state | 661 | * We must hold mmap_sem around checking core_state |
| @@ -719,12 +732,6 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
| 719 | 732 | ||
| 720 | zap_pid_ns_processes(pid_ns); | 733 | zap_pid_ns_processes(pid_ns); |
| 721 | write_lock_irq(&tasklist_lock); | 734 | write_lock_irq(&tasklist_lock); |
| 722 | /* | ||
| 723 | * We can not clear ->child_reaper or leave it alone. | ||
| 724 | * There may by stealth EXIT_DEAD tasks on ->children, | ||
| 725 | * forget_original_parent() must move them somewhere. | ||
| 726 | */ | ||
| 727 | pid_ns->child_reaper = init_pid_ns.child_reaper; | ||
| 728 | } else if (father->signal->has_child_subreaper) { | 735 | } else if (father->signal->has_child_subreaper) { |
| 729 | struct task_struct *reaper; | 736 | struct task_struct *reaper; |
| 730 | 737 | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 8ed89a175d79..d2a5f4ecc6dd 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #define PANIC_TIMER_STEP 100 | 27 | #define PANIC_TIMER_STEP 100 |
| 28 | #define PANIC_BLINK_SPD 18 | 28 | #define PANIC_BLINK_SPD 18 |
| 29 | 29 | ||
| 30 | int panic_on_oops; | 30 | int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; |
| 31 | static unsigned long tainted_mask; | 31 | static unsigned long tainted_mask; |
| 32 | static int pause_on_oops; | 32 | static int pause_on_oops; |
| 33 | static int pause_on_oops_flag; | 33 | static int pause_on_oops_flag; |
| @@ -108,8 +108,6 @@ void panic(const char *fmt, ...) | |||
| 108 | */ | 108 | */ |
| 109 | crash_kexec(NULL); | 109 | crash_kexec(NULL); |
| 110 | 110 | ||
| 111 | kmsg_dump(KMSG_DUMP_PANIC); | ||
| 112 | |||
| 113 | /* | 111 | /* |
| 114 | * Note smp_send_stop is the usual smp shutdown function, which | 112 | * Note smp_send_stop is the usual smp shutdown function, which |
| 115 | * unfortunately means it may not be hardened to work in a panic | 113 | * unfortunately means it may not be hardened to work in a panic |
| @@ -117,6 +115,8 @@ void panic(const char *fmt, ...) | |||
| 117 | */ | 115 | */ |
| 118 | smp_send_stop(); | 116 | smp_send_stop(); |
| 119 | 117 | ||
| 118 | kmsg_dump(KMSG_DUMP_PANIC); | ||
| 119 | |||
| 120 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); | 120 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); |
| 121 | 121 | ||
| 122 | bust_spinlocks(0); | 122 | bust_spinlocks(0); |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 16b20e38c4a1..b3c7fd554250 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
| @@ -184,11 +184,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
| 184 | } | 184 | } |
| 185 | read_unlock(&tasklist_lock); | 185 | read_unlock(&tasklist_lock); |
| 186 | 186 | ||
| 187 | /* Firstly reap the EXIT_ZOMBIE children we may have. */ | ||
| 187 | do { | 188 | do { |
| 188 | clear_thread_flag(TIF_SIGPENDING); | 189 | clear_thread_flag(TIF_SIGPENDING); |
| 189 | rc = sys_wait4(-1, NULL, __WALL, NULL); | 190 | rc = sys_wait4(-1, NULL, __WALL, NULL); |
| 190 | } while (rc != -ECHILD); | 191 | } while (rc != -ECHILD); |
| 191 | 192 | ||
| 193 | /* | ||
| 194 | * sys_wait4() above can't reap the TASK_DEAD children. | ||
| 195 | * Make sure they all go away, see __unhash_process(). | ||
| 196 | */ | ||
| 197 | for (;;) { | ||
| 198 | bool need_wait = false; | ||
| 199 | |||
| 200 | read_lock(&tasklist_lock); | ||
| 201 | if (!list_empty(¤t->children)) { | ||
| 202 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 203 | need_wait = true; | ||
| 204 | } | ||
| 205 | read_unlock(&tasklist_lock); | ||
| 206 | |||
| 207 | if (!need_wait) | ||
| 208 | break; | ||
| 209 | schedule(); | ||
| 210 | } | ||
| 211 | |||
| 192 | if (pid_ns->reboot) | 212 | if (pid_ns->reboot) |
| 193 | current->signal->group_exit_code = pid_ns->reboot; | 213 | current->signal->group_exit_code = pid_ns->reboot; |
| 194 | 214 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 32462d2b364a..5ae6b09e3805 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -193,12 +193,19 @@ static int console_may_schedule; | |||
| 193 | * separated by ',', and find the message after the ';' character. | 193 | * separated by ',', and find the message after the ';' character. |
| 194 | */ | 194 | */ |
| 195 | 195 | ||
| 196 | enum log_flags { | ||
| 197 | LOG_DEFAULT = 0, | ||
| 198 | LOG_NOCONS = 1, /* already flushed, do not print to console */ | ||
| 199 | }; | ||
| 200 | |||
| 196 | struct log { | 201 | struct log { |
| 197 | u64 ts_nsec; /* timestamp in nanoseconds */ | 202 | u64 ts_nsec; /* timestamp in nanoseconds */ |
| 198 | u16 len; /* length of entire record */ | 203 | u16 len; /* length of entire record */ |
| 199 | u16 text_len; /* length of text buffer */ | 204 | u16 text_len; /* length of text buffer */ |
| 200 | u16 dict_len; /* length of dictionary buffer */ | 205 | u16 dict_len; /* length of dictionary buffer */ |
| 201 | u16 level; /* syslog level + facility */ | 206 | u8 facility; /* syslog facility */ |
| 207 | u8 flags:5; /* internal record flags */ | ||
| 208 | u8 level:3; /* syslog level */ | ||
| 202 | }; | 209 | }; |
| 203 | 210 | ||
| 204 | /* | 211 | /* |
| @@ -227,10 +234,10 @@ static u32 clear_idx; | |||
| 227 | #define LOG_LINE_MAX 1024 | 234 | #define LOG_LINE_MAX 1024 |
| 228 | 235 | ||
| 229 | /* record buffer */ | 236 | /* record buffer */ |
| 230 | #if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | 237 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
| 231 | #define LOG_ALIGN 4 | 238 | #define LOG_ALIGN 4 |
| 232 | #else | 239 | #else |
| 233 | #define LOG_ALIGN 8 | 240 | #define LOG_ALIGN __alignof__(struct log) |
| 234 | #endif | 241 | #endif |
| 235 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) | 242 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) |
| 236 | static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); | 243 | static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); |
| @@ -286,6 +293,7 @@ static u32 log_next(u32 idx) | |||
| 286 | 293 | ||
| 287 | /* insert record into the buffer, discard old ones, update heads */ | 294 | /* insert record into the buffer, discard old ones, update heads */ |
| 288 | static void log_store(int facility, int level, | 295 | static void log_store(int facility, int level, |
| 296 | enum log_flags flags, u64 ts_nsec, | ||
| 289 | const char *dict, u16 dict_len, | 297 | const char *dict, u16 dict_len, |
| 290 | const char *text, u16 text_len) | 298 | const char *text, u16 text_len) |
| 291 | { | 299 | { |
| @@ -329,8 +337,13 @@ static void log_store(int facility, int level, | |||
| 329 | msg->text_len = text_len; | 337 | msg->text_len = text_len; |
| 330 | memcpy(log_dict(msg), dict, dict_len); | 338 | memcpy(log_dict(msg), dict, dict_len); |
| 331 | msg->dict_len = dict_len; | 339 | msg->dict_len = dict_len; |
| 332 | msg->level = (facility << 3) | (level & 7); | 340 | msg->facility = facility; |
| 333 | msg->ts_nsec = local_clock(); | 341 | msg->level = level & 7; |
| 342 | msg->flags = flags & 0x1f; | ||
| 343 | if (ts_nsec > 0) | ||
| 344 | msg->ts_nsec = ts_nsec; | ||
| 345 | else | ||
| 346 | msg->ts_nsec = local_clock(); | ||
| 334 | memset(log_dict(msg) + dict_len, 0, pad_len); | 347 | memset(log_dict(msg) + dict_len, 0, pad_len); |
| 335 | msg->len = sizeof(struct log) + text_len + dict_len + pad_len; | 348 | msg->len = sizeof(struct log) + text_len + dict_len + pad_len; |
| 336 | 349 | ||
| @@ -414,7 +427,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
| 414 | if (!user) | 427 | if (!user) |
| 415 | return -EBADF; | 428 | return -EBADF; |
| 416 | 429 | ||
| 417 | mutex_lock(&user->lock); | 430 | ret = mutex_lock_interruptible(&user->lock); |
| 431 | if (ret) | ||
| 432 | return ret; | ||
| 418 | raw_spin_lock(&logbuf_lock); | 433 | raw_spin_lock(&logbuf_lock); |
| 419 | while (user->seq == log_next_seq) { | 434 | while (user->seq == log_next_seq) { |
| 420 | if (file->f_flags & O_NONBLOCK) { | 435 | if (file->f_flags & O_NONBLOCK) { |
| @@ -444,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
| 444 | ts_usec = msg->ts_nsec; | 459 | ts_usec = msg->ts_nsec; |
| 445 | do_div(ts_usec, 1000); | 460 | do_div(ts_usec, 1000); |
| 446 | len = sprintf(user->buf, "%u,%llu,%llu;", | 461 | len = sprintf(user->buf, "%u,%llu,%llu;", |
| 447 | msg->level, user->seq, ts_usec); | 462 | (msg->facility << 3) | msg->level, user->seq, ts_usec); |
| 448 | 463 | ||
| 449 | /* escape non-printable characters */ | 464 | /* escape non-printable characters */ |
| 450 | for (i = 0; i < msg->text_len; i++) { | 465 | for (i = 0; i < msg->text_len; i++) { |
| @@ -785,6 +800,21 @@ static bool printk_time; | |||
| 785 | #endif | 800 | #endif |
| 786 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); | 801 | module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); |
| 787 | 802 | ||
| 803 | static size_t print_time(u64 ts, char *buf) | ||
| 804 | { | ||
| 805 | unsigned long rem_nsec; | ||
| 806 | |||
| 807 | if (!printk_time) | ||
| 808 | return 0; | ||
| 809 | |||
| 810 | if (!buf) | ||
| 811 | return 15; | ||
| 812 | |||
| 813 | rem_nsec = do_div(ts, 1000000000); | ||
| 814 | return sprintf(buf, "[%5lu.%06lu] ", | ||
| 815 | (unsigned long)ts, rem_nsec / 1000); | ||
| 816 | } | ||
| 817 | |||
| 788 | static size_t print_prefix(const struct log *msg, bool syslog, char *buf) | 818 | static size_t print_prefix(const struct log *msg, bool syslog, char *buf) |
| 789 | { | 819 | { |
| 790 | size_t len = 0; | 820 | size_t len = 0; |
| @@ -801,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf) | |||
| 801 | } | 831 | } |
| 802 | } | 832 | } |
| 803 | 833 | ||
| 804 | if (printk_time) { | 834 | len += print_time(msg->ts_nsec, buf ? buf + len : NULL); |
| 805 | if (buf) { | ||
| 806 | unsigned long long ts = msg->ts_nsec; | ||
| 807 | unsigned long rem_nsec = do_div(ts, 1000000000); | ||
| 808 | |||
| 809 | len += sprintf(buf + len, "[%5lu.%06lu] ", | ||
| 810 | (unsigned long) ts, rem_nsec / 1000); | ||
| 811 | } else { | ||
| 812 | len += 15; | ||
| 813 | } | ||
| 814 | } | ||
| 815 | |||
| 816 | return len; | 835 | return len; |
| 817 | } | 836 | } |
| 818 | 837 | ||
| @@ -860,26 +879,49 @@ static int syslog_print(char __user *buf, int size) | |||
| 860 | { | 879 | { |
| 861 | char *text; | 880 | char *text; |
| 862 | struct log *msg; | 881 | struct log *msg; |
| 863 | int len; | 882 | int len = 0; |
| 864 | 883 | ||
| 865 | text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); | 884 | text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); |
| 866 | if (!text) | 885 | if (!text) |
| 867 | return -ENOMEM; | 886 | return -ENOMEM; |
| 868 | 887 | ||
| 869 | raw_spin_lock_irq(&logbuf_lock); | 888 | while (size > 0) { |
| 870 | if (syslog_seq < log_first_seq) { | 889 | size_t n; |
| 871 | /* messages are gone, move to first one */ | 890 | |
| 872 | syslog_seq = log_first_seq; | 891 | raw_spin_lock_irq(&logbuf_lock); |
| 873 | syslog_idx = log_first_idx; | 892 | if (syslog_seq < log_first_seq) { |
| 874 | } | 893 | /* messages are gone, move to first one */ |
| 875 | msg = log_from_idx(syslog_idx); | 894 | syslog_seq = log_first_seq; |
| 876 | len = msg_print_text(msg, true, text, LOG_LINE_MAX); | 895 | syslog_idx = log_first_idx; |
| 877 | syslog_idx = log_next(syslog_idx); | 896 | } |
| 878 | syslog_seq++; | 897 | if (syslog_seq == log_next_seq) { |
| 879 | raw_spin_unlock_irq(&logbuf_lock); | 898 | raw_spin_unlock_irq(&logbuf_lock); |
| 899 | break; | ||
| 900 | } | ||
| 901 | msg = log_from_idx(syslog_idx); | ||
| 902 | n = msg_print_text(msg, true, text, LOG_LINE_MAX); | ||
| 903 | if (n <= size) { | ||
| 904 | syslog_idx = log_next(syslog_idx); | ||
| 905 | syslog_seq++; | ||
| 906 | } else | ||
| 907 | n = 0; | ||
| 908 | raw_spin_unlock_irq(&logbuf_lock); | ||
| 909 | |||
| 910 | if (!n) | ||
| 911 | break; | ||
| 912 | |||
| 913 | len += n; | ||
| 914 | size -= n; | ||
| 915 | buf += n; | ||
| 916 | n = copy_to_user(buf - n, text, n); | ||
| 880 | 917 | ||
| 881 | if (len > 0 && copy_to_user(buf, text, len)) | 918 | if (n) { |
| 882 | len = -EFAULT; | 919 | len -= n; |
| 920 | if (!len) | ||
| 921 | len = -EFAULT; | ||
| 922 | break; | ||
| 923 | } | ||
| 924 | } | ||
| 883 | 925 | ||
| 884 | kfree(text); | 926 | kfree(text); |
| 885 | return len; | 927 | return len; |
| @@ -909,7 +951,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 909 | /* | 951 | /* |
| 910 | * Find first record that fits, including all following records, | 952 | * Find first record that fits, including all following records, |
| 911 | * into the user-provided buffer for this dump. | 953 | * into the user-provided buffer for this dump. |
| 912 | */ | 954 | */ |
| 913 | seq = clear_seq; | 955 | seq = clear_seq; |
| 914 | idx = clear_idx; | 956 | idx = clear_idx; |
| 915 | while (seq < log_next_seq) { | 957 | while (seq < log_next_seq) { |
| @@ -919,6 +961,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 919 | idx = log_next(idx); | 961 | idx = log_next(idx); |
| 920 | seq++; | 962 | seq++; |
| 921 | } | 963 | } |
| 964 | |||
| 965 | /* move first record forward until length fits into the buffer */ | ||
| 922 | seq = clear_seq; | 966 | seq = clear_seq; |
| 923 | idx = clear_idx; | 967 | idx = clear_idx; |
| 924 | while (len > size && seq < log_next_seq) { | 968 | while (len > size && seq < log_next_seq) { |
| @@ -929,7 +973,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 929 | seq++; | 973 | seq++; |
| 930 | } | 974 | } |
| 931 | 975 | ||
| 932 | /* last message in this dump */ | 976 | /* last message fitting into this dump */ |
| 933 | next_seq = log_next_seq; | 977 | next_seq = log_next_seq; |
| 934 | 978 | ||
| 935 | len = 0; | 979 | len = 0; |
| @@ -974,6 +1018,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
| 974 | { | 1018 | { |
| 975 | bool clear = false; | 1019 | bool clear = false; |
| 976 | static int saved_console_loglevel = -1; | 1020 | static int saved_console_loglevel = -1; |
| 1021 | static DEFINE_MUTEX(syslog_mutex); | ||
| 977 | int error; | 1022 | int error; |
| 978 | 1023 | ||
| 979 | error = check_syslog_permissions(type, from_file); | 1024 | error = check_syslog_permissions(type, from_file); |
| @@ -1000,11 +1045,17 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
| 1000 | error = -EFAULT; | 1045 | error = -EFAULT; |
| 1001 | goto out; | 1046 | goto out; |
| 1002 | } | 1047 | } |
| 1048 | error = mutex_lock_interruptible(&syslog_mutex); | ||
| 1049 | if (error) | ||
| 1050 | goto out; | ||
| 1003 | error = wait_event_interruptible(log_wait, | 1051 | error = wait_event_interruptible(log_wait, |
| 1004 | syslog_seq != log_next_seq); | 1052 | syslog_seq != log_next_seq); |
| 1005 | if (error) | 1053 | if (error) { |
| 1054 | mutex_unlock(&syslog_mutex); | ||
| 1006 | goto out; | 1055 | goto out; |
| 1056 | } | ||
| 1007 | error = syslog_print(buf, len); | 1057 | error = syslog_print(buf, len); |
| 1058 | mutex_unlock(&syslog_mutex); | ||
| 1008 | break; | 1059 | break; |
| 1009 | /* Read/clear last kernel messages */ | 1060 | /* Read/clear last kernel messages */ |
| 1010 | case SYSLOG_ACTION_READ_CLEAR: | 1061 | case SYSLOG_ACTION_READ_CLEAR: |
| @@ -1027,6 +1078,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
| 1027 | /* Clear ring buffer */ | 1078 | /* Clear ring buffer */ |
| 1028 | case SYSLOG_ACTION_CLEAR: | 1079 | case SYSLOG_ACTION_CLEAR: |
| 1029 | syslog_print_all(NULL, 0, true); | 1080 | syslog_print_all(NULL, 0, true); |
| 1081 | break; | ||
| 1030 | /* Disable logging to console */ | 1082 | /* Disable logging to console */ |
| 1031 | case SYSLOG_ACTION_CONSOLE_OFF: | 1083 | case SYSLOG_ACTION_CONSOLE_OFF: |
| 1032 | if (saved_console_loglevel == -1) | 1084 | if (saved_console_loglevel == -1) |
| @@ -1259,15 +1311,92 @@ static inline void printk_delay(void) | |||
| 1259 | } | 1311 | } |
| 1260 | } | 1312 | } |
| 1261 | 1313 | ||
| 1314 | /* | ||
| 1315 | * Continuation lines are buffered, and not committed to the record buffer | ||
| 1316 | * until the line is complete, or a race forces it. The line fragments | ||
| 1317 | * though, are printed immediately to the consoles to ensure everything has | ||
| 1318 | * reached the console in case of a kernel crash. | ||
| 1319 | */ | ||
| 1320 | static struct cont { | ||
| 1321 | char buf[LOG_LINE_MAX]; | ||
| 1322 | size_t len; /* length == 0 means unused buffer */ | ||
| 1323 | size_t cons; /* bytes written to console */ | ||
| 1324 | struct task_struct *owner; /* task of first print*/ | ||
| 1325 | u64 ts_nsec; /* time of first print */ | ||
| 1326 | u8 level; /* log level of first message */ | ||
| 1327 | u8 facility; /* log level of first message */ | ||
| 1328 | bool flushed:1; /* buffer sealed and committed */ | ||
| 1329 | } cont; | ||
| 1330 | |||
| 1331 | static void cont_flush(void) | ||
| 1332 | { | ||
| 1333 | if (cont.flushed) | ||
| 1334 | return; | ||
| 1335 | if (cont.len == 0) | ||
| 1336 | return; | ||
| 1337 | |||
| 1338 | log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec, | ||
| 1339 | NULL, 0, cont.buf, cont.len); | ||
| 1340 | |||
| 1341 | cont.flushed = true; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | static bool cont_add(int facility, int level, const char *text, size_t len) | ||
| 1345 | { | ||
| 1346 | if (cont.len && cont.flushed) | ||
| 1347 | return false; | ||
| 1348 | |||
| 1349 | if (cont.len + len > sizeof(cont.buf)) { | ||
| 1350 | cont_flush(); | ||
| 1351 | return false; | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | if (!cont.len) { | ||
| 1355 | cont.facility = facility; | ||
| 1356 | cont.level = level; | ||
| 1357 | cont.owner = current; | ||
| 1358 | cont.ts_nsec = local_clock(); | ||
| 1359 | cont.cons = 0; | ||
| 1360 | cont.flushed = false; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | memcpy(cont.buf + cont.len, text, len); | ||
| 1364 | cont.len += len; | ||
| 1365 | return true; | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | static size_t cont_print_text(char *text, size_t size) | ||
| 1369 | { | ||
| 1370 | size_t textlen = 0; | ||
| 1371 | size_t len; | ||
| 1372 | |||
| 1373 | if (cont.cons == 0) { | ||
| 1374 | textlen += print_time(cont.ts_nsec, text); | ||
| 1375 | size -= textlen; | ||
| 1376 | } | ||
| 1377 | |||
| 1378 | len = cont.len - cont.cons; | ||
| 1379 | if (len > 0) { | ||
| 1380 | if (len+1 > size) | ||
| 1381 | len = size-1; | ||
| 1382 | memcpy(text + textlen, cont.buf + cont.cons, len); | ||
| 1383 | textlen += len; | ||
| 1384 | cont.cons = cont.len; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | if (cont.flushed) { | ||
| 1388 | text[textlen++] = '\n'; | ||
| 1389 | /* got everything, release buffer */ | ||
| 1390 | cont.len = 0; | ||
| 1391 | } | ||
| 1392 | return textlen; | ||
| 1393 | } | ||
| 1394 | |||
| 1262 | asmlinkage int vprintk_emit(int facility, int level, | 1395 | asmlinkage int vprintk_emit(int facility, int level, |
| 1263 | const char *dict, size_t dictlen, | 1396 | const char *dict, size_t dictlen, |
| 1264 | const char *fmt, va_list args) | 1397 | const char *fmt, va_list args) |
| 1265 | { | 1398 | { |
| 1266 | static int recursion_bug; | 1399 | static int recursion_bug; |
| 1267 | static char cont_buf[LOG_LINE_MAX]; | ||
| 1268 | static size_t cont_len; | ||
| 1269 | static int cont_level; | ||
| 1270 | static struct task_struct *cont_task; | ||
| 1271 | static char textbuf[LOG_LINE_MAX]; | 1400 | static char textbuf[LOG_LINE_MAX]; |
| 1272 | char *text = textbuf; | 1401 | char *text = textbuf; |
| 1273 | size_t text_len; | 1402 | size_t text_len; |
| @@ -1313,7 +1442,8 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
| 1313 | recursion_bug = 0; | 1442 | recursion_bug = 0; |
| 1314 | printed_len += strlen(recursion_msg); | 1443 | printed_len += strlen(recursion_msg); |
| 1315 | /* emit KERN_CRIT message */ | 1444 | /* emit KERN_CRIT message */ |
| 1316 | log_store(0, 2, NULL, 0, recursion_msg, printed_len); | 1445 | log_store(0, 2, LOG_DEFAULT, 0, |
| 1446 | NULL, 0, recursion_msg, printed_len); | ||
| 1317 | } | 1447 | } |
| 1318 | 1448 | ||
| 1319 | /* | 1449 | /* |
| @@ -1351,55 +1481,37 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
| 1351 | } | 1481 | } |
| 1352 | 1482 | ||
| 1353 | if (!newline) { | 1483 | if (!newline) { |
| 1354 | if (cont_len && (prefix || cont_task != current)) { | 1484 | /* |
| 1355 | /* | 1485 | * Flush the conflicting buffer. An earlier newline was missing, |
| 1356 | * Flush earlier buffer, which is either from a | 1486 | * or another task also prints continuation lines. |
| 1357 | * different thread, or when we got a new prefix. | 1487 | */ |
| 1358 | */ | 1488 | if (cont.len && (prefix || cont.owner != current)) |
| 1359 | log_store(facility, cont_level, NULL, 0, cont_buf, cont_len); | 1489 | cont_flush(); |
| 1360 | cont_len = 0; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | if (!cont_len) { | ||
| 1364 | cont_level = level; | ||
| 1365 | cont_task = current; | ||
| 1366 | } | ||
| 1367 | 1490 | ||
| 1368 | /* buffer or append to earlier buffer from the same thread */ | 1491 | /* buffer line if possible, otherwise store it right away */ |
| 1369 | if (cont_len + text_len > sizeof(cont_buf)) | 1492 | if (!cont_add(facility, level, text, text_len)) |
| 1370 | text_len = sizeof(cont_buf) - cont_len; | 1493 | log_store(facility, level, LOG_DEFAULT, 0, |
| 1371 | memcpy(cont_buf + cont_len, text, text_len); | 1494 | dict, dictlen, text, text_len); |
| 1372 | cont_len += text_len; | ||
| 1373 | } else { | 1495 | } else { |
| 1374 | if (cont_len && cont_task == current) { | 1496 | bool stored = false; |
| 1375 | if (prefix) { | ||
| 1376 | /* | ||
| 1377 | * New prefix from the same thread; flush. We | ||
| 1378 | * either got no earlier newline, or we race | ||
| 1379 | * with an interrupt. | ||
| 1380 | */ | ||
| 1381 | log_store(facility, cont_level, | ||
| 1382 | NULL, 0, cont_buf, cont_len); | ||
| 1383 | cont_len = 0; | ||
| 1384 | } | ||
| 1385 | 1497 | ||
| 1386 | /* append to the earlier buffer and flush */ | 1498 | /* |
| 1387 | if (cont_len + text_len > sizeof(cont_buf)) | 1499 | * If an earlier newline was missing and it was the same task, |
| 1388 | text_len = sizeof(cont_buf) - cont_len; | 1500 | * either merge it with the current buffer and flush, or if |
| 1389 | memcpy(cont_buf + cont_len, text, text_len); | 1501 | * there was a race with interrupts (prefix == true) then just |
| 1390 | cont_len += text_len; | 1502 | * flush it out and store this line separately. |
| 1391 | log_store(facility, cont_level, | 1503 | */ |
| 1392 | NULL, 0, cont_buf, cont_len); | 1504 | if (cont.len && cont.owner == current) { |
| 1393 | cont_len = 0; | 1505 | if (!prefix) |
| 1394 | cont_task = NULL; | 1506 | stored = cont_add(facility, level, text, text_len); |
| 1395 | printed_len = cont_len; | 1507 | cont_flush(); |
| 1396 | } else { | ||
| 1397 | /* ordinary single and terminated line */ | ||
| 1398 | log_store(facility, level, | ||
| 1399 | dict, dictlen, text, text_len); | ||
| 1400 | printed_len = text_len; | ||
| 1401 | } | 1508 | } |
| 1509 | |||
| 1510 | if (!stored) | ||
| 1511 | log_store(facility, level, LOG_DEFAULT, 0, | ||
| 1512 | dict, dictlen, text, text_len); | ||
| 1402 | } | 1513 | } |
| 1514 | printed_len += text_len; | ||
| 1403 | 1515 | ||
| 1404 | /* | 1516 | /* |
| 1405 | * Try to acquire and then immediately release the console semaphore. | 1517 | * Try to acquire and then immediately release the console semaphore. |
| @@ -1486,11 +1598,18 @@ EXPORT_SYMBOL(printk); | |||
| 1486 | #else | 1598 | #else |
| 1487 | 1599 | ||
| 1488 | #define LOG_LINE_MAX 0 | 1600 | #define LOG_LINE_MAX 0 |
| 1601 | static struct cont { | ||
| 1602 | size_t len; | ||
| 1603 | size_t cons; | ||
| 1604 | u8 level; | ||
| 1605 | bool flushed:1; | ||
| 1606 | } cont; | ||
| 1489 | static struct log *log_from_idx(u32 idx) { return NULL; } | 1607 | static struct log *log_from_idx(u32 idx) { return NULL; } |
| 1490 | static u32 log_next(u32 idx) { return 0; } | 1608 | static u32 log_next(u32 idx) { return 0; } |
| 1491 | static void call_console_drivers(int level, const char *text, size_t len) {} | 1609 | static void call_console_drivers(int level, const char *text, size_t len) {} |
| 1492 | static size_t msg_print_text(const struct log *msg, bool syslog, | 1610 | static size_t msg_print_text(const struct log *msg, bool syslog, |
| 1493 | char *buf, size_t size) { return 0; } | 1611 | char *buf, size_t size) { return 0; } |
| 1612 | static size_t cont_print_text(char *text, size_t size) { return 0; } | ||
| 1494 | 1613 | ||
| 1495 | #endif /* CONFIG_PRINTK */ | 1614 | #endif /* CONFIG_PRINTK */ |
| 1496 | 1615 | ||
| @@ -1782,6 +1901,7 @@ static u32 console_idx; | |||
| 1782 | */ | 1901 | */ |
| 1783 | void console_unlock(void) | 1902 | void console_unlock(void) |
| 1784 | { | 1903 | { |
| 1904 | static char text[LOG_LINE_MAX]; | ||
| 1785 | static u64 seen_seq; | 1905 | static u64 seen_seq; |
| 1786 | unsigned long flags; | 1906 | unsigned long flags; |
| 1787 | bool wake_klogd = false; | 1907 | bool wake_klogd = false; |
| @@ -1794,10 +1914,23 @@ void console_unlock(void) | |||
| 1794 | 1914 | ||
| 1795 | console_may_schedule = 0; | 1915 | console_may_schedule = 0; |
| 1796 | 1916 | ||
| 1917 | /* flush buffered message fragment immediately to console */ | ||
| 1918 | raw_spin_lock_irqsave(&logbuf_lock, flags); | ||
| 1919 | if (cont.len && (cont.cons < cont.len || cont.flushed)) { | ||
| 1920 | size_t len; | ||
| 1921 | |||
| 1922 | len = cont_print_text(text, sizeof(text)); | ||
| 1923 | raw_spin_unlock(&logbuf_lock); | ||
| 1924 | stop_critical_timings(); | ||
| 1925 | call_console_drivers(cont.level, text, len); | ||
| 1926 | start_critical_timings(); | ||
| 1927 | local_irq_restore(flags); | ||
| 1928 | } else | ||
| 1929 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
| 1930 | |||
| 1797 | again: | 1931 | again: |
| 1798 | for (;;) { | 1932 | for (;;) { |
| 1799 | struct log *msg; | 1933 | struct log *msg; |
| 1800 | static char text[LOG_LINE_MAX]; | ||
| 1801 | size_t len; | 1934 | size_t len; |
| 1802 | int level; | 1935 | int level; |
| 1803 | 1936 | ||
| @@ -1812,13 +1945,22 @@ again: | |||
| 1812 | console_seq = log_first_seq; | 1945 | console_seq = log_first_seq; |
| 1813 | console_idx = log_first_idx; | 1946 | console_idx = log_first_idx; |
| 1814 | } | 1947 | } |
| 1815 | 1948 | skip: | |
| 1816 | if (console_seq == log_next_seq) | 1949 | if (console_seq == log_next_seq) |
| 1817 | break; | 1950 | break; |
| 1818 | 1951 | ||
| 1819 | msg = log_from_idx(console_idx); | 1952 | msg = log_from_idx(console_idx); |
| 1820 | level = msg->level & 7; | 1953 | if (msg->flags & LOG_NOCONS) { |
| 1954 | /* | ||
| 1955 | * Skip record we have buffered and already printed | ||
| 1956 | * directly to the console when we received it. | ||
| 1957 | */ | ||
| 1958 | console_idx = log_next(console_idx); | ||
| 1959 | console_seq++; | ||
| 1960 | goto skip; | ||
| 1961 | } | ||
| 1821 | 1962 | ||
| 1963 | level = msg->level; | ||
| 1822 | len = msg_print_text(msg, false, text, sizeof(text)); | 1964 | len = msg_print_text(msg, false, text, sizeof(text)); |
| 1823 | 1965 | ||
| 1824 | console_idx = log_next(console_idx); | 1966 | console_idx = log_next(console_idx); |
| @@ -2300,48 +2442,210 @@ module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); | |||
| 2300 | * kmsg_dump - dump kernel log to kernel message dumpers. | 2442 | * kmsg_dump - dump kernel log to kernel message dumpers. |
| 2301 | * @reason: the reason (oops, panic etc) for dumping | 2443 | * @reason: the reason (oops, panic etc) for dumping |
| 2302 | * | 2444 | * |
| 2303 | * Iterate through each of the dump devices and call the oops/panic | 2445 | * Call each of the registered dumper's dump() callback, which can |
| 2304 | * callbacks with the log buffer. | 2446 | * retrieve the kmsg records with kmsg_dump_get_line() or |
| 2447 | * kmsg_dump_get_buffer(). | ||
| 2305 | */ | 2448 | */ |
| 2306 | void kmsg_dump(enum kmsg_dump_reason reason) | 2449 | void kmsg_dump(enum kmsg_dump_reason reason) |
| 2307 | { | 2450 | { |
| 2308 | u64 idx; | ||
| 2309 | struct kmsg_dumper *dumper; | 2451 | struct kmsg_dumper *dumper; |
| 2310 | const char *s1, *s2; | ||
| 2311 | unsigned long l1, l2; | ||
| 2312 | unsigned long flags; | 2452 | unsigned long flags; |
| 2313 | 2453 | ||
| 2314 | if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) | 2454 | if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) |
| 2315 | return; | 2455 | return; |
| 2316 | 2456 | ||
| 2317 | /* Theoretically, the log could move on after we do this, but | 2457 | rcu_read_lock(); |
| 2318 | there's not a lot we can do about that. The new messages | 2458 | list_for_each_entry_rcu(dumper, &dump_list, list) { |
| 2319 | will overwrite the start of what we dump. */ | 2459 | if (dumper->max_reason && reason > dumper->max_reason) |
| 2460 | continue; | ||
| 2461 | |||
| 2462 | /* initialize iterator with data about the stored records */ | ||
| 2463 | dumper->active = true; | ||
| 2464 | |||
| 2465 | raw_spin_lock_irqsave(&logbuf_lock, flags); | ||
| 2466 | dumper->cur_seq = clear_seq; | ||
| 2467 | dumper->cur_idx = clear_idx; | ||
| 2468 | dumper->next_seq = log_next_seq; | ||
| 2469 | dumper->next_idx = log_next_idx; | ||
| 2470 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
| 2471 | |||
| 2472 | /* invoke dumper which will iterate over records */ | ||
| 2473 | dumper->dump(dumper, reason); | ||
| 2474 | |||
| 2475 | /* reset iterator */ | ||
| 2476 | dumper->active = false; | ||
| 2477 | } | ||
| 2478 | rcu_read_unlock(); | ||
| 2479 | } | ||
| 2480 | |||
| 2481 | /** | ||
| 2482 | * kmsg_dump_get_line - retrieve one kmsg log line | ||
| 2483 | * @dumper: registered kmsg dumper | ||
| 2484 | * @syslog: include the "<4>" prefixes | ||
| 2485 | * @line: buffer to copy the line to | ||
| 2486 | * @size: maximum size of the buffer | ||
| 2487 | * @len: length of line placed into buffer | ||
| 2488 | * | ||
| 2489 | * Start at the beginning of the kmsg buffer, with the oldest kmsg | ||
| 2490 | * record, and copy one record into the provided buffer. | ||
| 2491 | * | ||
| 2492 | * Consecutive calls will return the next available record moving | ||
| 2493 | * towards the end of the buffer with the youngest messages. | ||
| 2494 | * | ||
| 2495 | * A return value of FALSE indicates that there are no more records to | ||
| 2496 | * read. | ||
| 2497 | */ | ||
| 2498 | bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | ||
| 2499 | char *line, size_t size, size_t *len) | ||
| 2500 | { | ||
| 2501 | unsigned long flags; | ||
| 2502 | struct log *msg; | ||
| 2503 | size_t l = 0; | ||
| 2504 | bool ret = false; | ||
| 2505 | |||
| 2506 | if (!dumper->active) | ||
| 2507 | goto out; | ||
| 2320 | 2508 | ||
| 2321 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2509 | raw_spin_lock_irqsave(&logbuf_lock, flags); |
| 2322 | if (syslog_seq < log_first_seq) | 2510 | if (dumper->cur_seq < log_first_seq) { |
| 2323 | idx = syslog_idx; | 2511 | /* messages are gone, move to first available one */ |
| 2324 | else | 2512 | dumper->cur_seq = log_first_seq; |
| 2325 | idx = log_first_idx; | 2513 | dumper->cur_idx = log_first_idx; |
| 2514 | } | ||
| 2326 | 2515 | ||
| 2327 | if (idx > log_next_idx) { | 2516 | /* last entry */ |
| 2328 | s1 = log_buf; | 2517 | if (dumper->cur_seq >= log_next_seq) { |
| 2329 | l1 = log_next_idx; | 2518 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); |
| 2519 | goto out; | ||
| 2520 | } | ||
| 2330 | 2521 | ||
| 2331 | s2 = log_buf + idx; | 2522 | msg = log_from_idx(dumper->cur_idx); |
| 2332 | l2 = log_buf_len - idx; | 2523 | l = msg_print_text(msg, syslog, |
| 2333 | } else { | 2524 | line, size); |
| 2334 | s1 = ""; | ||
| 2335 | l1 = 0; | ||
| 2336 | 2525 | ||
| 2337 | s2 = log_buf + idx; | 2526 | dumper->cur_idx = log_next(dumper->cur_idx); |
| 2338 | l2 = log_next_idx - idx; | 2527 | dumper->cur_seq++; |
| 2528 | ret = true; | ||
| 2529 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
| 2530 | out: | ||
| 2531 | if (len) | ||
| 2532 | *len = l; | ||
| 2533 | return ret; | ||
| 2534 | } | ||
| 2535 | EXPORT_SYMBOL_GPL(kmsg_dump_get_line); | ||
| 2536 | |||
| 2537 | /** | ||
| 2538 | * kmsg_dump_get_buffer - copy kmsg log lines | ||
| 2539 | * @dumper: registered kmsg dumper | ||
| 2540 | * @syslog: include the "<4>" prefixes | ||
| 2541 | * @line: buffer to copy the line to | ||
| 2542 | * @size: maximum size of the buffer | ||
| 2543 | * @len: length of line placed into buffer | ||
| 2544 | * | ||
| 2545 | * Start at the end of the kmsg buffer and fill the provided buffer | ||
| 2546 | * with as many of the the *youngest* kmsg records that fit into it. | ||
| 2547 | * If the buffer is large enough, all available kmsg records will be | ||
| 2548 | * copied with a single call. | ||
| 2549 | * | ||
| 2550 | * Consecutive calls will fill the buffer with the next block of | ||
| 2551 | * available older records, not including the earlier retrieved ones. | ||
| 2552 | * | ||
| 2553 | * A return value of FALSE indicates that there are no more records to | ||
| 2554 | * read. | ||
| 2555 | */ | ||
| 2556 | bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | ||
| 2557 | char *buf, size_t size, size_t *len) | ||
| 2558 | { | ||
| 2559 | unsigned long flags; | ||
| 2560 | u64 seq; | ||
| 2561 | u32 idx; | ||
| 2562 | u64 next_seq; | ||
| 2563 | u32 next_idx; | ||
| 2564 | size_t l = 0; | ||
| 2565 | bool ret = false; | ||
| 2566 | |||
| 2567 | if (!dumper->active) | ||
| 2568 | goto out; | ||
| 2569 | |||
| 2570 | raw_spin_lock_irqsave(&logbuf_lock, flags); | ||
| 2571 | if (dumper->cur_seq < log_first_seq) { | ||
| 2572 | /* messages are gone, move to first available one */ | ||
| 2573 | dumper->cur_seq = log_first_seq; | ||
| 2574 | dumper->cur_idx = log_first_idx; | ||
| 2339 | } | 2575 | } |
| 2576 | |||
| 2577 | /* last entry */ | ||
| 2578 | if (dumper->cur_seq >= dumper->next_seq) { | ||
| 2579 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
| 2580 | goto out; | ||
| 2581 | } | ||
| 2582 | |||
| 2583 | /* calculate length of entire buffer */ | ||
| 2584 | seq = dumper->cur_seq; | ||
| 2585 | idx = dumper->cur_idx; | ||
| 2586 | while (seq < dumper->next_seq) { | ||
| 2587 | struct log *msg = log_from_idx(idx); | ||
| 2588 | |||
| 2589 | l += msg_print_text(msg, true, NULL, 0); | ||
| 2590 | idx = log_next(idx); | ||
| 2591 | seq++; | ||
| 2592 | } | ||
| 2593 | |||
| 2594 | /* move first record forward until length fits into the buffer */ | ||
| 2595 | seq = dumper->cur_seq; | ||
| 2596 | idx = dumper->cur_idx; | ||
| 2597 | while (l > size && seq < dumper->next_seq) { | ||
| 2598 | struct log *msg = log_from_idx(idx); | ||
| 2599 | |||
| 2600 | l -= msg_print_text(msg, true, NULL, 0); | ||
| 2601 | idx = log_next(idx); | ||
| 2602 | seq++; | ||
| 2603 | } | ||
| 2604 | |||
| 2605 | /* last message in next interation */ | ||
| 2606 | next_seq = seq; | ||
| 2607 | next_idx = idx; | ||
| 2608 | |||
| 2609 | l = 0; | ||
| 2610 | while (seq < dumper->next_seq) { | ||
| 2611 | struct log *msg = log_from_idx(idx); | ||
| 2612 | |||
| 2613 | l += msg_print_text(msg, syslog, | ||
| 2614 | buf + l, size - l); | ||
| 2615 | |||
| 2616 | idx = log_next(idx); | ||
| 2617 | seq++; | ||
| 2618 | } | ||
| 2619 | |||
| 2620 | dumper->next_seq = next_seq; | ||
| 2621 | dumper->next_idx = next_idx; | ||
| 2622 | ret = true; | ||
| 2340 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2623 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); |
| 2624 | out: | ||
| 2625 | if (len) | ||
| 2626 | *len = l; | ||
| 2627 | return ret; | ||
| 2628 | } | ||
| 2629 | EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); | ||
| 2341 | 2630 | ||
| 2342 | rcu_read_lock(); | 2631 | /** |
| 2343 | list_for_each_entry_rcu(dumper, &dump_list, list) | 2632 | * kmsg_dump_rewind - reset the interator |
| 2344 | dumper->dump(dumper, reason, s1, l1, s2, l2); | 2633 | * @dumper: registered kmsg dumper |
| 2345 | rcu_read_unlock(); | 2634 | * |
| 2635 | * Reset the dumper's iterator so that kmsg_dump_get_line() and | ||
| 2636 | * kmsg_dump_get_buffer() can be called again and used multiple | ||
| 2637 | * times within the same dumper.dump() callback. | ||
| 2638 | */ | ||
| 2639 | void kmsg_dump_rewind(struct kmsg_dumper *dumper) | ||
| 2640 | { | ||
| 2641 | unsigned long flags; | ||
| 2642 | |||
| 2643 | raw_spin_lock_irqsave(&logbuf_lock, flags); | ||
| 2644 | dumper->cur_seq = clear_seq; | ||
| 2645 | dumper->cur_idx = clear_idx; | ||
| 2646 | dumper->next_seq = log_next_seq; | ||
| 2647 | dumper->next_idx = log_next_idx; | ||
| 2648 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
| 2346 | } | 2649 | } |
| 2650 | EXPORT_SYMBOL_GPL(kmsg_dump_rewind); | ||
| 2347 | #endif | 2651 | #endif |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0da7b88d92d0..38ecdda3f55f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -1397,6 +1397,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | |||
| 1397 | rdp->qlen_lazy += rsp->qlen_lazy; | 1397 | rdp->qlen_lazy += rsp->qlen_lazy; |
| 1398 | rdp->qlen += rsp->qlen; | 1398 | rdp->qlen += rsp->qlen; |
| 1399 | rdp->n_cbs_adopted += rsp->qlen; | 1399 | rdp->n_cbs_adopted += rsp->qlen; |
| 1400 | if (rsp->qlen_lazy != rsp->qlen) | ||
| 1401 | rcu_idle_count_callbacks_posted(); | ||
| 1400 | rsp->qlen_lazy = 0; | 1402 | rsp->qlen_lazy = 0; |
| 1401 | rsp->qlen = 0; | 1403 | rsp->qlen = 0; |
| 1402 | 1404 | ||
| @@ -1528,7 +1530,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1528 | { | 1530 | { |
| 1529 | unsigned long flags; | 1531 | unsigned long flags; |
| 1530 | struct rcu_head *next, *list, **tail; | 1532 | struct rcu_head *next, *list, **tail; |
| 1531 | int bl, count, count_lazy; | 1533 | int bl, count, count_lazy, i; |
| 1532 | 1534 | ||
| 1533 | /* If no callbacks are ready, just return.*/ | 1535 | /* If no callbacks are ready, just return.*/ |
| 1534 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | 1536 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
| @@ -1551,9 +1553,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1551 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1553 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
| 1552 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1554 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
| 1553 | tail = rdp->nxttail[RCU_DONE_TAIL]; | 1555 | tail = rdp->nxttail[RCU_DONE_TAIL]; |
| 1554 | for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) | 1556 | for (i = RCU_NEXT_SIZE - 1; i >= 0; i--) |
| 1555 | if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) | 1557 | if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) |
| 1556 | rdp->nxttail[count] = &rdp->nxtlist; | 1558 | rdp->nxttail[i] = &rdp->nxtlist; |
| 1557 | local_irq_restore(flags); | 1559 | local_irq_restore(flags); |
| 1558 | 1560 | ||
| 1559 | /* Invoke callbacks. */ | 1561 | /* Invoke callbacks. */ |
| @@ -1581,9 +1583,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1581 | if (list != NULL) { | 1583 | if (list != NULL) { |
| 1582 | *tail = rdp->nxtlist; | 1584 | *tail = rdp->nxtlist; |
| 1583 | rdp->nxtlist = list; | 1585 | rdp->nxtlist = list; |
| 1584 | for (count = 0; count < RCU_NEXT_SIZE; count++) | 1586 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
| 1585 | if (&rdp->nxtlist == rdp->nxttail[count]) | 1587 | if (&rdp->nxtlist == rdp->nxttail[i]) |
| 1586 | rdp->nxttail[count] = tail; | 1588 | rdp->nxttail[i] = tail; |
| 1587 | else | 1589 | else |
| 1588 | break; | 1590 | break; |
| 1589 | } | 1591 | } |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7f5d138dedf5..ea056495783e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -84,6 +84,20 @@ struct rcu_dynticks { | |||
| 84 | /* Process level is worth LLONG_MAX/2. */ | 84 | /* Process level is worth LLONG_MAX/2. */ |
| 85 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ | 85 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
| 86 | atomic_t dynticks; /* Even value for idle, else odd. */ | 86 | atomic_t dynticks; /* Even value for idle, else odd. */ |
| 87 | #ifdef CONFIG_RCU_FAST_NO_HZ | ||
| 88 | int dyntick_drain; /* Prepare-for-idle state variable. */ | ||
| 89 | unsigned long dyntick_holdoff; | ||
| 90 | /* No retries for the jiffy of failure. */ | ||
| 91 | struct timer_list idle_gp_timer; | ||
| 92 | /* Wake up CPU sleeping with callbacks. */ | ||
| 93 | unsigned long idle_gp_timer_expires; | ||
| 94 | /* When to wake up CPU (for repost). */ | ||
| 95 | bool idle_first_pass; /* First pass of attempt to go idle? */ | ||
| 96 | unsigned long nonlazy_posted; | ||
| 97 | /* # times non-lazy CBs posted to CPU. */ | ||
| 98 | unsigned long nonlazy_posted_snap; | ||
| 99 | /* idle-period nonlazy_posted snapshot. */ | ||
| 100 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | ||
| 87 | }; | 101 | }; |
| 88 | 102 | ||
| 89 | /* RCU's kthread states for tracing. */ | 103 | /* RCU's kthread states for tracing. */ |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 2411000d9869..5271a020887e 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) | |||
| 1886 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs | 1886 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs |
| 1887 | * any flavor of RCU. | 1887 | * any flavor of RCU. |
| 1888 | */ | 1888 | */ |
| 1889 | int rcu_needs_cpu(int cpu) | 1889 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) |
| 1890 | { | 1890 | { |
| 1891 | *delta_jiffies = ULONG_MAX; | ||
| 1891 | return rcu_cpu_has_callbacks(cpu); | 1892 | return rcu_cpu_has_callbacks(cpu); |
| 1892 | } | 1893 | } |
| 1893 | 1894 | ||
| @@ -1962,41 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void) | |||
| 1962 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | 1963 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ |
| 1963 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ | 1964 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ |
| 1964 | 1965 | ||
| 1965 | /* Loop counter for rcu_prepare_for_idle(). */ | ||
| 1966 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | ||
| 1967 | /* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */ | ||
| 1968 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | ||
| 1969 | /* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */ | ||
| 1970 | static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer); | ||
| 1971 | /* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */ | ||
| 1972 | static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires); | ||
| 1973 | /* Enable special processing on first attempt to enter dyntick-idle mode. */ | ||
| 1974 | static DEFINE_PER_CPU(bool, rcu_idle_first_pass); | ||
| 1975 | /* Running count of non-lazy callbacks posted, never decremented. */ | ||
| 1976 | static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted); | ||
| 1977 | /* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */ | ||
| 1978 | static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap); | ||
| 1979 | |||
| 1980 | /* | ||
| 1981 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | ||
| 1982 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter | ||
| 1983 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to | ||
| 1984 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | ||
| 1985 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | ||
| 1986 | * it is better to incur scheduling-clock interrupts than to spin | ||
| 1987 | * continuously for the same time duration! | ||
| 1988 | */ | ||
| 1989 | int rcu_needs_cpu(int cpu) | ||
| 1990 | { | ||
| 1991 | /* Flag a new idle sojourn to the idle-entry state machine. */ | ||
| 1992 | per_cpu(rcu_idle_first_pass, cpu) = 1; | ||
| 1993 | /* If no callbacks, RCU doesn't need the CPU. */ | ||
| 1994 | if (!rcu_cpu_has_callbacks(cpu)) | ||
| 1995 | return 0; | ||
| 1996 | /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ | ||
| 1997 | return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; | ||
| 1998 | } | ||
| 1999 | |||
| 2000 | /* | 1966 | /* |
| 2001 | * Does the specified flavor of RCU have non-lazy callbacks pending on | 1967 | * Does the specified flavor of RCU have non-lazy callbacks pending on |
| 2002 | * the specified CPU? Both RCU flavor and CPU are specified by the | 1968 | * the specified CPU? Both RCU flavor and CPU are specified by the |
| @@ -2040,6 +2006,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) | |||
| 2040 | } | 2006 | } |
| 2041 | 2007 | ||
| 2042 | /* | 2008 | /* |
| 2009 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | ||
| 2010 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter | ||
| 2011 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to | ||
| 2012 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | ||
| 2013 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | ||
| 2014 | * it is better to incur scheduling-clock interrupts than to spin | ||
| 2015 | * continuously for the same time duration! | ||
| 2016 | * | ||
| 2017 | * The delta_jiffies argument is used to store the time when RCU is | ||
| 2018 | * going to need the CPU again if it still has callbacks. The reason | ||
| 2019 | * for this is that rcu_prepare_for_idle() might need to post a timer, | ||
| 2020 | * but if so, it will do so after tick_nohz_stop_sched_tick() has set | ||
| 2021 | * the wakeup time for this CPU. This means that RCU's timer can be | ||
| 2022 | * delayed until the wakeup time, which defeats the purpose of posting | ||
| 2023 | * a timer. | ||
| 2024 | */ | ||
| 2025 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | ||
| 2026 | { | ||
| 2027 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
| 2028 | |||
| 2029 | /* Flag a new idle sojourn to the idle-entry state machine. */ | ||
| 2030 | rdtp->idle_first_pass = 1; | ||
| 2031 | /* If no callbacks, RCU doesn't need the CPU. */ | ||
| 2032 | if (!rcu_cpu_has_callbacks(cpu)) { | ||
| 2033 | *delta_jiffies = ULONG_MAX; | ||
| 2034 | return 0; | ||
| 2035 | } | ||
| 2036 | if (rdtp->dyntick_holdoff == jiffies) { | ||
| 2037 | /* RCU recently tried and failed, so don't try again. */ | ||
| 2038 | *delta_jiffies = 1; | ||
| 2039 | return 1; | ||
| 2040 | } | ||
| 2041 | /* Set up for the possibility that RCU will post a timer. */ | ||
| 2042 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) | ||
| 2043 | *delta_jiffies = RCU_IDLE_GP_DELAY; | ||
| 2044 | else | ||
| 2045 | *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; | ||
| 2046 | return 0; | ||
| 2047 | } | ||
| 2048 | |||
| 2049 | /* | ||
| 2043 | * Handler for smp_call_function_single(). The only point of this | 2050 | * Handler for smp_call_function_single(). The only point of this |
| 2044 | * handler is to wake the CPU up, so the handler does only tracing. | 2051 | * handler is to wake the CPU up, so the handler does only tracing. |
| 2045 | */ | 2052 | */ |
| @@ -2075,21 +2082,24 @@ static void rcu_idle_gp_timer_func(unsigned long cpu_in) | |||
| 2075 | */ | 2082 | */ |
| 2076 | static void rcu_prepare_for_idle_init(int cpu) | 2083 | static void rcu_prepare_for_idle_init(int cpu) |
| 2077 | { | 2084 | { |
| 2078 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2085 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
| 2079 | setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), | 2086 | |
| 2080 | rcu_idle_gp_timer_func, cpu); | 2087 | rdtp->dyntick_holdoff = jiffies - 1; |
| 2081 | per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1; | 2088 | setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); |
| 2082 | per_cpu(rcu_idle_first_pass, cpu) = 1; | 2089 | rdtp->idle_gp_timer_expires = jiffies - 1; |
| 2090 | rdtp->idle_first_pass = 1; | ||
| 2083 | } | 2091 | } |
| 2084 | 2092 | ||
| 2085 | /* | 2093 | /* |
| 2086 | * Clean up for exit from idle. Because we are exiting from idle, there | 2094 | * Clean up for exit from idle. Because we are exiting from idle, there |
| 2087 | * is no longer any point to rcu_idle_gp_timer, so cancel it. This will | 2095 | * is no longer any point to ->idle_gp_timer, so cancel it. This will |
| 2088 | * do nothing if this timer is not active, so just cancel it unconditionally. | 2096 | * do nothing if this timer is not active, so just cancel it unconditionally. |
| 2089 | */ | 2097 | */ |
| 2090 | static void rcu_cleanup_after_idle(int cpu) | 2098 | static void rcu_cleanup_after_idle(int cpu) |
| 2091 | { | 2099 | { |
| 2092 | del_timer(&per_cpu(rcu_idle_gp_timer, cpu)); | 2100 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
| 2101 | |||
| 2102 | del_timer(&rdtp->idle_gp_timer); | ||
| 2093 | trace_rcu_prep_idle("Cleanup after idle"); | 2103 | trace_rcu_prep_idle("Cleanup after idle"); |
| 2094 | } | 2104 | } |
| 2095 | 2105 | ||
| @@ -2108,42 +2118,41 @@ static void rcu_cleanup_after_idle(int cpu) | |||
| 2108 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | 2118 | * Because it is not legal to invoke rcu_process_callbacks() with irqs |
| 2109 | * disabled, we do one pass of force_quiescent_state(), then do a | 2119 | * disabled, we do one pass of force_quiescent_state(), then do a |
| 2110 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked | 2120 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked |
| 2111 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. | 2121 | * later. The ->dyntick_drain field controls the sequencing. |
| 2112 | * | 2122 | * |
| 2113 | * The caller must have disabled interrupts. | 2123 | * The caller must have disabled interrupts. |
| 2114 | */ | 2124 | */ |
| 2115 | static void rcu_prepare_for_idle(int cpu) | 2125 | static void rcu_prepare_for_idle(int cpu) |
| 2116 | { | 2126 | { |
| 2117 | struct timer_list *tp; | 2127 | struct timer_list *tp; |
| 2128 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
| 2118 | 2129 | ||
| 2119 | /* | 2130 | /* |
| 2120 | * If this is an idle re-entry, for example, due to use of | 2131 | * If this is an idle re-entry, for example, due to use of |
| 2121 | * RCU_NONIDLE() or the new idle-loop tracing API within the idle | 2132 | * RCU_NONIDLE() or the new idle-loop tracing API within the idle |
| 2122 | * loop, then don't take any state-machine actions, unless the | 2133 | * loop, then don't take any state-machine actions, unless the |
| 2123 | * momentary exit from idle queued additional non-lazy callbacks. | 2134 | * momentary exit from idle queued additional non-lazy callbacks. |
| 2124 | * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks | 2135 | * Instead, repost the ->idle_gp_timer if this CPU has callbacks |
| 2125 | * pending. | 2136 | * pending. |
| 2126 | */ | 2137 | */ |
| 2127 | if (!per_cpu(rcu_idle_first_pass, cpu) && | 2138 | if (!rdtp->idle_first_pass && |
| 2128 | (per_cpu(rcu_nonlazy_posted, cpu) == | 2139 | (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { |
| 2129 | per_cpu(rcu_nonlazy_posted_snap, cpu))) { | ||
| 2130 | if (rcu_cpu_has_callbacks(cpu)) { | 2140 | if (rcu_cpu_has_callbacks(cpu)) { |
| 2131 | tp = &per_cpu(rcu_idle_gp_timer, cpu); | 2141 | tp = &rdtp->idle_gp_timer; |
| 2132 | mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); | 2142 | mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); |
| 2133 | } | 2143 | } |
| 2134 | return; | 2144 | return; |
| 2135 | } | 2145 | } |
| 2136 | per_cpu(rcu_idle_first_pass, cpu) = 0; | 2146 | rdtp->idle_first_pass = 0; |
| 2137 | per_cpu(rcu_nonlazy_posted_snap, cpu) = | 2147 | rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; |
| 2138 | per_cpu(rcu_nonlazy_posted, cpu) - 1; | ||
| 2139 | 2148 | ||
| 2140 | /* | 2149 | /* |
| 2141 | * If there are no callbacks on this CPU, enter dyntick-idle mode. | 2150 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
| 2142 | * Also reset state to avoid prejudicing later attempts. | 2151 | * Also reset state to avoid prejudicing later attempts. |
| 2143 | */ | 2152 | */ |
| 2144 | if (!rcu_cpu_has_callbacks(cpu)) { | 2153 | if (!rcu_cpu_has_callbacks(cpu)) { |
| 2145 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2154 | rdtp->dyntick_holdoff = jiffies - 1; |
| 2146 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2155 | rdtp->dyntick_drain = 0; |
| 2147 | trace_rcu_prep_idle("No callbacks"); | 2156 | trace_rcu_prep_idle("No callbacks"); |
| 2148 | return; | 2157 | return; |
| 2149 | } | 2158 | } |
| @@ -2152,36 +2161,37 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2152 | * If in holdoff mode, just return. We will presumably have | 2161 | * If in holdoff mode, just return. We will presumably have |
| 2153 | * refrained from disabling the scheduling-clock tick. | 2162 | * refrained from disabling the scheduling-clock tick. |
| 2154 | */ | 2163 | */ |
| 2155 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { | 2164 | if (rdtp->dyntick_holdoff == jiffies) { |
| 2156 | trace_rcu_prep_idle("In holdoff"); | 2165 | trace_rcu_prep_idle("In holdoff"); |
| 2157 | return; | 2166 | return; |
| 2158 | } | 2167 | } |
| 2159 | 2168 | ||
| 2160 | /* Check and update the rcu_dyntick_drain sequencing. */ | 2169 | /* Check and update the ->dyntick_drain sequencing. */ |
| 2161 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2170 | if (rdtp->dyntick_drain <= 0) { |
| 2162 | /* First time through, initialize the counter. */ | 2171 | /* First time through, initialize the counter. */ |
| 2163 | per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; | 2172 | rdtp->dyntick_drain = RCU_IDLE_FLUSHES; |
| 2164 | } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && | 2173 | } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && |
| 2165 | !rcu_pending(cpu) && | 2174 | !rcu_pending(cpu) && |
| 2166 | !local_softirq_pending()) { | 2175 | !local_softirq_pending()) { |
| 2167 | /* Can we go dyntick-idle despite still having callbacks? */ | 2176 | /* Can we go dyntick-idle despite still having callbacks? */ |
| 2168 | trace_rcu_prep_idle("Dyntick with callbacks"); | 2177 | rdtp->dyntick_drain = 0; |
| 2169 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2178 | rdtp->dyntick_holdoff = jiffies; |
| 2170 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2179 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { |
| 2171 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) | 2180 | trace_rcu_prep_idle("Dyntick with callbacks"); |
| 2172 | per_cpu(rcu_idle_gp_timer_expires, cpu) = | 2181 | rdtp->idle_gp_timer_expires = |
| 2173 | jiffies + RCU_IDLE_GP_DELAY; | 2182 | jiffies + RCU_IDLE_GP_DELAY; |
| 2174 | else | 2183 | } else { |
| 2175 | per_cpu(rcu_idle_gp_timer_expires, cpu) = | 2184 | rdtp->idle_gp_timer_expires = |
| 2176 | jiffies + RCU_IDLE_LAZY_GP_DELAY; | 2185 | jiffies + RCU_IDLE_LAZY_GP_DELAY; |
| 2177 | tp = &per_cpu(rcu_idle_gp_timer, cpu); | 2186 | trace_rcu_prep_idle("Dyntick with lazy callbacks"); |
| 2178 | mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); | 2187 | } |
| 2179 | per_cpu(rcu_nonlazy_posted_snap, cpu) = | 2188 | tp = &rdtp->idle_gp_timer; |
| 2180 | per_cpu(rcu_nonlazy_posted, cpu); | 2189 | mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); |
| 2190 | rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; | ||
| 2181 | return; /* Nothing more to do immediately. */ | 2191 | return; /* Nothing more to do immediately. */ |
| 2182 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2192 | } else if (--(rdtp->dyntick_drain) <= 0) { |
| 2183 | /* We have hit the limit, so time to give up. */ | 2193 | /* We have hit the limit, so time to give up. */ |
| 2184 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2194 | rdtp->dyntick_holdoff = jiffies; |
| 2185 | trace_rcu_prep_idle("Begin holdoff"); | 2195 | trace_rcu_prep_idle("Begin holdoff"); |
| 2186 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | 2196 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ |
| 2187 | return; | 2197 | return; |
| @@ -2227,7 +2237,7 @@ static void rcu_prepare_for_idle(int cpu) | |||
| 2227 | */ | 2237 | */ |
| 2228 | static void rcu_idle_count_callbacks_posted(void) | 2238 | static void rcu_idle_count_callbacks_posted(void) |
| 2229 | { | 2239 | { |
| 2230 | __this_cpu_add(rcu_nonlazy_posted, 1); | 2240 | __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); |
| 2231 | } | 2241 | } |
| 2232 | 2242 | ||
| 2233 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2243 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
| @@ -2238,11 +2248,12 @@ static void rcu_idle_count_callbacks_posted(void) | |||
| 2238 | 2248 | ||
| 2239 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | 2249 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) |
| 2240 | { | 2250 | { |
| 2241 | struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu); | 2251 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
| 2252 | struct timer_list *tltp = &rdtp->idle_gp_timer; | ||
| 2242 | 2253 | ||
| 2243 | sprintf(cp, "drain=%d %c timer=%lu", | 2254 | sprintf(cp, "drain=%d %c timer=%lu", |
| 2244 | per_cpu(rcu_dyntick_drain, cpu), | 2255 | rdtp->dyntick_drain, |
| 2245 | per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', | 2256 | rdtp->dyntick_holdoff == jiffies ? 'H' : '.', |
| 2246 | timer_pending(tltp) ? tltp->expires - jiffies : -1); | 2257 | timer_pending(tltp) ? tltp->expires - jiffies : -1); |
| 2247 | } | 2258 | } |
| 2248 | 2259 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index f0ec44dcd415..e0c8ffc50d7f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -2127,9 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2127 | else | 2127 | else |
| 2128 | return -EINVAL; | 2128 | return -EINVAL; |
| 2129 | break; | 2129 | break; |
| 2130 | case PR_GET_TID_ADDRESS: | ||
| 2131 | error = prctl_get_tid_address(me, (int __user **)arg2); | ||
| 2132 | break; | ||
| 2133 | default: | 2130 | default: |
| 2134 | return -EINVAL; | 2131 | return -EINVAL; |
| 2135 | } | 2132 | } |
| @@ -2147,6 +2144,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2147 | case PR_SET_MM: | 2144 | case PR_SET_MM: |
| 2148 | error = prctl_set_mm(arg2, arg3, arg4, arg5); | 2145 | error = prctl_set_mm(arg2, arg3, arg4, arg5); |
| 2149 | break; | 2146 | break; |
| 2147 | case PR_GET_TID_ADDRESS: | ||
| 2148 | error = prctl_get_tid_address(me, (int __user **)arg2); | ||
| 2149 | break; | ||
| 2150 | case PR_SET_CHILD_SUBREAPER: | 2150 | case PR_SET_CHILD_SUBREAPER: |
| 2151 | me->signal->is_child_subreaper = !!arg2; | 2151 | me->signal->is_child_subreaper = !!arg2; |
| 2152 | error = 0; | 2152 | error = 0; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index da70c6db496c..869997833928 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); | |||
| 274 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | 274 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts) |
| 275 | { | 275 | { |
| 276 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; | 276 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; |
| 277 | unsigned long rcu_delta_jiffies; | ||
| 277 | ktime_t last_update, expires, now; | 278 | ktime_t last_update, expires, now; |
| 278 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 279 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
| 279 | u64 time_delta; | 280 | u64 time_delta; |
| @@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | |||
| 322 | time_delta = timekeeping_max_deferment(); | 323 | time_delta = timekeeping_max_deferment(); |
| 323 | } while (read_seqretry(&xtime_lock, seq)); | 324 | } while (read_seqretry(&xtime_lock, seq)); |
| 324 | 325 | ||
| 325 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || | 326 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || |
| 326 | arch_needs_cpu(cpu)) { | 327 | arch_needs_cpu(cpu)) { |
| 327 | next_jiffies = last_jiffies + 1; | 328 | next_jiffies = last_jiffies + 1; |
| 328 | delta_jiffies = 1; | 329 | delta_jiffies = 1; |
| @@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | |||
| 330 | /* Get the next timer wheel timer */ | 331 | /* Get the next timer wheel timer */ |
| 331 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 332 | next_jiffies = get_next_timer_interrupt(last_jiffies); |
| 332 | delta_jiffies = next_jiffies - last_jiffies; | 333 | delta_jiffies = next_jiffies - last_jiffies; |
| 334 | if (rcu_delta_jiffies < delta_jiffies) { | ||
| 335 | next_jiffies = last_jiffies + rcu_delta_jiffies; | ||
| 336 | delta_jiffies = rcu_delta_jiffies; | ||
| 337 | } | ||
| 333 | } | 338 | } |
| 334 | /* | 339 | /* |
| 335 | * Do not stop the tick, if we are only one off | 340 | * Do not stop the tick, if we are only one off |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 68032c6177db..49249c28690d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -371,7 +371,7 @@ EXPORT_SYMBOL_GPL(tracing_on); | |||
| 371 | void tracing_off(void) | 371 | void tracing_off(void) |
| 372 | { | 372 | { |
| 373 | if (global_trace.buffer) | 373 | if (global_trace.buffer) |
| 374 | ring_buffer_record_on(global_trace.buffer); | 374 | ring_buffer_record_off(global_trace.buffer); |
| 375 | /* | 375 | /* |
| 376 | * This flag is only looked at when buffers haven't been | 376 | * This flag is only looked at when buffers haven't been |
| 377 | * allocated yet. We don't really care about the race | 377 | * allocated yet. We don't really care about the race |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index e5e1d85b8c7c..4b1dfba70f7c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -372,6 +372,13 @@ static int watchdog(void *unused) | |||
| 372 | 372 | ||
| 373 | 373 | ||
| 374 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 374 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
| 375 | /* | ||
| 376 | * People like the simple clean cpu node info on boot. | ||
| 377 | * Reduce the watchdog noise by only printing messages | ||
| 378 | * that are different from what cpu0 displayed. | ||
| 379 | */ | ||
| 380 | static unsigned long cpu0_err; | ||
| 381 | |||
| 375 | static int watchdog_nmi_enable(int cpu) | 382 | static int watchdog_nmi_enable(int cpu) |
| 376 | { | 383 | { |
| 377 | struct perf_event_attr *wd_attr; | 384 | struct perf_event_attr *wd_attr; |
| @@ -390,11 +397,21 @@ static int watchdog_nmi_enable(int cpu) | |||
| 390 | 397 | ||
| 391 | /* Try to register using hardware perf events */ | 398 | /* Try to register using hardware perf events */ |
| 392 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | 399 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); |
| 400 | |||
| 401 | /* save cpu0 error for future comparision */ | ||
| 402 | if (cpu == 0 && IS_ERR(event)) | ||
| 403 | cpu0_err = PTR_ERR(event); | ||
| 404 | |||
| 393 | if (!IS_ERR(event)) { | 405 | if (!IS_ERR(event)) { |
| 394 | pr_info("enabled, takes one hw-pmu counter.\n"); | 406 | /* only print for cpu0 or different than cpu0 */ |
| 407 | if (cpu == 0 || cpu0_err) | ||
| 408 | pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); | ||
| 395 | goto out_save; | 409 | goto out_save; |
| 396 | } | 410 | } |
| 397 | 411 | ||
| 412 | /* skip displaying the same error again */ | ||
| 413 | if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) | ||
| 414 | return PTR_ERR(event); | ||
| 398 | 415 | ||
| 399 | /* vary the KERN level based on the returned errno */ | 416 | /* vary the KERN level based on the returned errno */ |
| 400 | if (PTR_ERR(event) == -EOPNOTSUPP) | 417 | if (PTR_ERR(event) == -EOPNOTSUPP) |
