aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAnton Vorontsov <anton.vorontsov@linaro.org>2012-07-31 07:59:42 -0400
committerAnton Vorontsov <anton.vorontsov@linaro.org>2012-07-31 08:16:47 -0400
commite6db06a53b1dcf4e9da4aba143e2eb4d63418abb (patch)
tree10adcecb71c95ce4393c39fa7911d091bcadfe09 /kernel
parentecc2edd56c49fa31a0a9ed15a7bf810ae79d3b85 (diff)
parentc56f5c0342dfee11a1a13d2f5bb7618de5b17590 (diff)
Merge with upstream to accommodate with thermal changes
This merge is performed to take commit c56f5c0342dfee11a1 ("Thermal: Make Thermal trip points writeable") out of Linus' tree and then fixup power supply class. This is needed since thermal stuff added a new argument: CC drivers/power/power_supply_core.o drivers/power/power_supply_core.c: In function ‘psy_register_thermal’: drivers/power/power_supply_core.c:204:6: warning: passing argument 3 of ‘thermal_zone_device_register’ makes integer from pointer without a cast [enabled by default] include/linux/thermal.h:154:29: note: expected ‘int’ but argument is of type ‘struct power_supply *’ drivers/power/power_supply_core.c:204:6: error: too few arguments to function ‘thermal_zone_device_register’ include/linux/thermal.h:154:29: note: declared here make[1]: *** [drivers/power/power_supply_core.o] Error 1 make: *** [drivers/power/] Error 2 Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c36
-rw-r--r--kernel/debug/kdb/kdb_main.c91
-rw-r--r--kernel/debug/kdb/kdb_private.h1
-rw-r--r--kernel/events/core.c10
-rw-r--r--kernel/exit.c19
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/hrtimer.c53
-rw-r--r--kernel/pid_namespace.c20
-rw-r--r--kernel/power/hibernate.c8
-rw-r--r--kernel/power/user.c2
-rw-r--r--kernel/printk.c727
-rw-r--r--kernel/rcutree.c15
-rw-r--r--kernel/rcutree.h1
-rw-r--r--kernel/rcutree_plugin.h14
-rw-r--r--kernel/relay.c5
-rw-r--r--kernel/sched/core.c276
-rw-r--r--kernel/sched/idle_task.c1
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/sys.c22
-rw-r--r--kernel/time/ntp.c8
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c64
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace.c6
24 files changed, 1006 insertions, 394 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 72fcd3069a90..b303dfc7dce0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -255,12 +255,17 @@ int cgroup_lock_is_held(void)
255 255
256EXPORT_SYMBOL_GPL(cgroup_lock_is_held); 256EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
257 257
258static int css_unbias_refcnt(int refcnt)
259{
260 return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
261}
262
258/* the current nr of refs, always >= 0 whether @css is deactivated or not */ 263/* the current nr of refs, always >= 0 whether @css is deactivated or not */
259static int css_refcnt(struct cgroup_subsys_state *css) 264static int css_refcnt(struct cgroup_subsys_state *css)
260{ 265{
261 int v = atomic_read(&css->refcnt); 266 int v = atomic_read(&css->refcnt);
262 267
263 return v >= 0 ? v : v - CSS_DEACT_BIAS; 268 return css_unbias_refcnt(v);
264} 269}
265 270
266/* convenient tests for these bits */ 271/* convenient tests for these bits */
@@ -896,13 +901,10 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
896 mutex_unlock(&cgroup_mutex); 901 mutex_unlock(&cgroup_mutex);
897 902
898 /* 903 /*
899 * We want to drop the active superblock reference from the 904 * Drop the active superblock reference that we took when we
900 * cgroup creation after all the dentry refs are gone - 905 * created the cgroup
901 * kill_sb gets mighty unhappy otherwise. Mark
902 * dentry->d_fsdata with cgroup_diput() to tell
903 * cgroup_d_release() to call deactivate_super().
904 */ 906 */
905 dentry->d_fsdata = cgroup_diput; 907 deactivate_super(cgrp->root->sb);
906 908
907 /* 909 /*
908 * if we're getting rid of the cgroup, refcount should ensure 910 * if we're getting rid of the cgroup, refcount should ensure
@@ -928,13 +930,6 @@ static int cgroup_delete(const struct dentry *d)
928 return 1; 930 return 1;
929} 931}
930 932
931static void cgroup_d_release(struct dentry *dentry)
932{
933 /* did cgroup_diput() tell me to deactivate super? */
934 if (dentry->d_fsdata == cgroup_diput)
935 deactivate_super(dentry->d_sb);
936}
937
938static void remove_dir(struct dentry *d) 933static void remove_dir(struct dentry *d)
939{ 934{
940 struct dentry *parent = dget(d->d_parent); 935 struct dentry *parent = dget(d->d_parent);
@@ -1542,7 +1537,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
1542 static const struct dentry_operations cgroup_dops = { 1537 static const struct dentry_operations cgroup_dops = {
1543 .d_iput = cgroup_diput, 1538 .d_iput = cgroup_diput,
1544 .d_delete = cgroup_delete, 1539 .d_delete = cgroup_delete,
1545 .d_release = cgroup_d_release,
1546 }; 1540 };
1547 1541
1548 struct inode *inode = 1542 struct inode *inode =
@@ -3889,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work)
3889{ 3883{
3890 struct cgroup_subsys_state *css = 3884 struct cgroup_subsys_state *css =
3891 container_of(work, struct cgroup_subsys_state, dput_work); 3885 container_of(work, struct cgroup_subsys_state, dput_work);
3886 struct dentry *dentry = css->cgroup->dentry;
3887 struct super_block *sb = dentry->d_sb;
3892 3888
3893 dput(css->cgroup->dentry); 3889 atomic_inc(&sb->s_active);
3890 dput(dentry);
3891 deactivate_super(sb);
3894} 3892}
3895 3893
3896static void init_cgroup_css(struct cgroup_subsys_state *css, 3894static void init_cgroup_css(struct cgroup_subsys_state *css,
@@ -4982,10 +4980,12 @@ EXPORT_SYMBOL_GPL(__css_tryget);
4982void __css_put(struct cgroup_subsys_state *css) 4980void __css_put(struct cgroup_subsys_state *css)
4983{ 4981{
4984 struct cgroup *cgrp = css->cgroup; 4982 struct cgroup *cgrp = css->cgroup;
4983 int v;
4985 4984
4986 rcu_read_lock(); 4985 rcu_read_lock();
4987 atomic_dec(&css->refcnt); 4986 v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
4988 switch (css_refcnt(css)) { 4987
4988 switch (v) {
4989 case 1: 4989 case 1:
4990 if (notify_on_release(cgrp)) { 4990 if (notify_on_release(cgrp)) {
4991 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4991 set_bit(CGRP_RELEASABLE, &cgrp->flags);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 67b847dfa2bb..1f91413edb87 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -14,6 +14,7 @@
14#include <linux/ctype.h> 14#include <linux/ctype.h>
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/kmsg_dump.h>
17#include <linux/reboot.h> 18#include <linux/reboot.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
19#include <linux/sysrq.h> 20#include <linux/sysrq.h>
@@ -2040,8 +2041,15 @@ static int kdb_env(int argc, const char **argv)
2040 */ 2041 */
2041static int kdb_dmesg(int argc, const char **argv) 2042static int kdb_dmesg(int argc, const char **argv)
2042{ 2043{
2043 char *syslog_data[4], *start, *end, c = '\0', *p; 2044 int diag;
2044 int diag, logging, logsize, lines = 0, adjust = 0, n; 2045 int logging;
2046 int lines = 0;
2047 int adjust = 0;
2048 int n = 0;
2049 int skip = 0;
2050 struct kmsg_dumper dumper = { .active = 1 };
2051 size_t len;
2052 char buf[201];
2045 2053
2046 if (argc > 2) 2054 if (argc > 2)
2047 return KDB_ARGCOUNT; 2055 return KDB_ARGCOUNT;
@@ -2064,22 +2072,10 @@ static int kdb_dmesg(int argc, const char **argv)
2064 kdb_set(2, setargs); 2072 kdb_set(2, setargs);
2065 } 2073 }
2066 2074
2067 /* syslog_data[0,1] physical start, end+1. syslog_data[2,3] 2075 kmsg_dump_rewind_nolock(&dumper);
2068 * logical start, end+1. */ 2076 while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL))
2069 kdb_syslog_data(syslog_data); 2077 n++;
2070 if (syslog_data[2] == syslog_data[3]) 2078
2071 return 0;
2072 logsize = syslog_data[1] - syslog_data[0];
2073 start = syslog_data[2];
2074 end = syslog_data[3];
2075#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
2076 for (n = 0, p = start; p < end; ++p) {
2077 c = *KDB_WRAP(p);
2078 if (c == '\n')
2079 ++n;
2080 }
2081 if (c != '\n')
2082 ++n;
2083 if (lines < 0) { 2079 if (lines < 0) {
2084 if (adjust >= n) 2080 if (adjust >= n)
2085 kdb_printf("buffer only contains %d lines, nothing " 2081 kdb_printf("buffer only contains %d lines, nothing "
@@ -2087,21 +2083,11 @@ static int kdb_dmesg(int argc, const char **argv)
2087 else if (adjust - lines >= n) 2083 else if (adjust - lines >= n)
2088 kdb_printf("buffer only contains %d lines, last %d " 2084 kdb_printf("buffer only contains %d lines, last %d "
2089 "lines printed\n", n, n - adjust); 2085 "lines printed\n", n, n - adjust);
2090 if (adjust) { 2086 skip = adjust;
2091 for (; start < end && adjust; ++start) { 2087 lines = abs(lines);
2092 if (*KDB_WRAP(start) == '\n')
2093 --adjust;
2094 }
2095 if (start < end)
2096 ++start;
2097 }
2098 for (p = start; p < end && lines; ++p) {
2099 if (*KDB_WRAP(p) == '\n')
2100 ++lines;
2101 }
2102 end = p;
2103 } else if (lines > 0) { 2088 } else if (lines > 0) {
2104 int skip = n - (adjust + lines); 2089 skip = n - lines - adjust;
2090 lines = abs(lines);
2105 if (adjust >= n) { 2091 if (adjust >= n) {
2106 kdb_printf("buffer only contains %d lines, " 2092 kdb_printf("buffer only contains %d lines, "
2107 "nothing printed\n", n); 2093 "nothing printed\n", n);
@@ -2112,35 +2098,24 @@ static int kdb_dmesg(int argc, const char **argv)
2112 kdb_printf("buffer only contains %d lines, first " 2098 kdb_printf("buffer only contains %d lines, first "
2113 "%d lines printed\n", n, lines); 2099 "%d lines printed\n", n, lines);
2114 } 2100 }
2115 for (; start < end && skip; ++start) { 2101 } else {
2116 if (*KDB_WRAP(start) == '\n') 2102 lines = n;
2117 --skip;
2118 }
2119 for (p = start; p < end && lines; ++p) {
2120 if (*KDB_WRAP(p) == '\n')
2121 --lines;
2122 }
2123 end = p;
2124 } 2103 }
2125 /* Do a line at a time (max 200 chars) to reduce protocol overhead */ 2104
2126 c = '\n'; 2105 if (skip >= n || skip < 0)
2127 while (start != end) { 2106 return 0;
2128 char buf[201]; 2107
2129 p = buf; 2108 kmsg_dump_rewind_nolock(&dumper);
2130 if (KDB_FLAG(CMD_INTERRUPT)) 2109 while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) {
2131 return 0; 2110 if (skip) {
2132 while (start < end && (c = *KDB_WRAP(start)) && 2111 skip--;
2133 (p - buf) < sizeof(buf)-1) { 2112 continue;
2134 ++start;
2135 *p++ = c;
2136 if (c == '\n')
2137 break;
2138 } 2113 }
2139 *p = '\0'; 2114 if (!lines--)
2140 kdb_printf("%s", buf); 2115 break;
2116
2117 kdb_printf("%.*s\n", (int)len - 1, buf);
2141 } 2118 }
2142 if (c != '\n')
2143 kdb_printf("\n");
2144 2119
2145 return 0; 2120 return 0;
2146} 2121}
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 47c4e56e513b..392ec6a25844 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -205,7 +205,6 @@ extern char kdb_grep_string[];
205extern int kdb_grep_leading; 205extern int kdb_grep_leading;
206extern int kdb_grep_trailing; 206extern int kdb_grep_trailing;
207extern char *kdb_cmds[]; 207extern char *kdb_cmds[];
208extern void kdb_syslog_data(char *syslog_data[]);
209extern unsigned long kdb_task_state_string(const char *); 208extern unsigned long kdb_task_state_string(const char *);
210extern char kdb_task_state_char (const struct task_struct *); 209extern char kdb_task_state_char (const struct task_struct *);
211extern unsigned long kdb_task_state(const struct task_struct *p, 210extern unsigned long kdb_task_state(const struct task_struct *p,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f85c0154b333..d7d71d6ec972 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -253,9 +253,9 @@ perf_cgroup_match(struct perf_event *event)
253 return !event->cgrp || event->cgrp == cpuctx->cgrp; 253 return !event->cgrp || event->cgrp == cpuctx->cgrp;
254} 254}
255 255
256static inline void perf_get_cgroup(struct perf_event *event) 256static inline bool perf_tryget_cgroup(struct perf_event *event)
257{ 257{
258 css_get(&event->cgrp->css); 258 return css_tryget(&event->cgrp->css);
259} 259}
260 260
261static inline void perf_put_cgroup(struct perf_event *event) 261static inline void perf_put_cgroup(struct perf_event *event)
@@ -484,7 +484,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
484 event->cgrp = cgrp; 484 event->cgrp = cgrp;
485 485
486 /* must be done before we fput() the file */ 486 /* must be done before we fput() the file */
487 perf_get_cgroup(event); 487 if (!perf_tryget_cgroup(event)) {
488 event->cgrp = NULL;
489 ret = -ENOENT;
490 goto out;
491 }
488 492
489 /* 493 /*
490 * all events in a group must monitor 494 * all events in a group must monitor
diff --git a/kernel/exit.c b/kernel/exit.c
index 34867cc5b42a..2f59cc334516 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,6 +72,18 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
72 list_del_rcu(&p->tasks); 72 list_del_rcu(&p->tasks);
73 list_del_init(&p->sibling); 73 list_del_init(&p->sibling);
74 __this_cpu_dec(process_counts); 74 __this_cpu_dec(process_counts);
75 /*
76 * If we are the last child process in a pid namespace to be
77 * reaped, notify the reaper sleeping zap_pid_ns_processes().
78 */
79 if (IS_ENABLED(CONFIG_PID_NS)) {
80 struct task_struct *parent = p->real_parent;
81
82 if ((task_active_pid_ns(parent)->child_reaper == parent) &&
83 list_empty(&parent->children) &&
84 (parent->flags & PF_EXITING))
85 wake_up_process(parent);
86 }
75 } 87 }
76 list_del_rcu(&p->thread_group); 88 list_del_rcu(&p->thread_group);
77} 89}
@@ -643,6 +655,7 @@ static void exit_mm(struct task_struct * tsk)
643 mm_release(tsk, mm); 655 mm_release(tsk, mm);
644 if (!mm) 656 if (!mm)
645 return; 657 return;
658 sync_mm_rss(mm);
646 /* 659 /*
647 * Serialize with any possible pending coredump. 660 * Serialize with any possible pending coredump.
648 * We must hold mmap_sem around checking core_state 661 * We must hold mmap_sem around checking core_state
@@ -719,12 +732,6 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
719 732
720 zap_pid_ns_processes(pid_ns); 733 zap_pid_ns_processes(pid_ns);
721 write_lock_irq(&tasklist_lock); 734 write_lock_irq(&tasklist_lock);
722 /*
723 * We can not clear ->child_reaper or leave it alone.
724 * There may by stealth EXIT_DEAD tasks on ->children,
725 * forget_original_parent() must move them somewhere.
726 */
727 pid_ns->child_reaper = init_pid_ns.child_reaper;
728 } else if (father->signal->has_child_subreaper) { 735 } else if (father->signal->has_child_subreaper) {
729 struct task_struct *reaper; 736 struct task_struct *reaper;
730 737
diff --git a/kernel/fork.c b/kernel/fork.c
index ab5211b9e622..f00e319d8376 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -304,12 +304,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
304 } 304 }
305 305
306 err = arch_dup_task_struct(tsk, orig); 306 err = arch_dup_task_struct(tsk, orig);
307 if (err)
308 goto out;
309 307
308 /*
309 * We defer looking at err, because we will need this setup
310 * for the clean up path to work correctly.
311 */
310 tsk->stack = ti; 312 tsk->stack = ti;
311
312 setup_thread_stack(tsk, orig); 313 setup_thread_stack(tsk, orig);
314
315 if (err)
316 goto out;
317
313 clear_user_return_notifier(tsk); 318 clear_user_return_notifier(tsk);
314 clear_tsk_need_resched(tsk); 319 clear_tsk_need_resched(tsk);
315 stackend = end_of_stack(tsk); 320 stackend = end_of_stack(tsk);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ae34bf51682b..6db7a5ed52b5 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
657 return 0; 657 return 0;
658} 658}
659 659
660static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
661{
662 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
663 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
664
665 return ktime_get_update_offsets(offs_real, offs_boot);
666}
667
660/* 668/*
661 * Retrigger next event is called after clock was set 669 * Retrigger next event is called after clock was set
662 * 670 *
@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
665static void retrigger_next_event(void *arg) 673static void retrigger_next_event(void *arg)
666{ 674{
667 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); 675 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
668 struct timespec realtime_offset, xtim, wtm, sleep;
669 676
670 if (!hrtimer_hres_active()) 677 if (!hrtimer_hres_active())
671 return; 678 return;
672 679
673 /* Optimized out for !HIGH_RES */
674 get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
675 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
676
677 /* Adjust CLOCK_REALTIME offset */
678 raw_spin_lock(&base->lock); 680 raw_spin_lock(&base->lock);
679 base->clock_base[HRTIMER_BASE_REALTIME].offset = 681 hrtimer_update_base(base);
680 timespec_to_ktime(realtime_offset);
681 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
682 timespec_to_ktime(sleep);
683
684 hrtimer_force_reprogram(base, 0); 682 hrtimer_force_reprogram(base, 0);
685 raw_spin_unlock(&base->lock); 683 raw_spin_unlock(&base->lock);
686} 684}
@@ -710,13 +708,25 @@ static int hrtimer_switch_to_hres(void)
710 base->clock_base[i].resolution = KTIME_HIGH_RES; 708 base->clock_base[i].resolution = KTIME_HIGH_RES;
711 709
712 tick_setup_sched_timer(); 710 tick_setup_sched_timer();
713
714 /* "Retrigger" the interrupt to get things going */ 711 /* "Retrigger" the interrupt to get things going */
715 retrigger_next_event(NULL); 712 retrigger_next_event(NULL);
716 local_irq_restore(flags); 713 local_irq_restore(flags);
717 return 1; 714 return 1;
718} 715}
719 716
717/*
718 * Called from timekeeping code to reprogramm the hrtimer interrupt
719 * device. If called from the timer interrupt context we defer it to
720 * softirq context.
721 */
722void clock_was_set_delayed(void)
723{
724 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
725
726 cpu_base->clock_was_set = 1;
727 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
728}
729
720#else 730#else
721 731
722static inline int hrtimer_hres_active(void) { return 0; } 732static inline int hrtimer_hres_active(void) { return 0; }
@@ -1250,11 +1260,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1250 cpu_base->nr_events++; 1260 cpu_base->nr_events++;
1251 dev->next_event.tv64 = KTIME_MAX; 1261 dev->next_event.tv64 = KTIME_MAX;
1252 1262
1253 entry_time = now = ktime_get(); 1263 raw_spin_lock(&cpu_base->lock);
1264 entry_time = now = hrtimer_update_base(cpu_base);
1254retry: 1265retry:
1255 expires_next.tv64 = KTIME_MAX; 1266 expires_next.tv64 = KTIME_MAX;
1256
1257 raw_spin_lock(&cpu_base->lock);
1258 /* 1267 /*
1259 * We set expires_next to KTIME_MAX here with cpu_base->lock 1268 * We set expires_next to KTIME_MAX here with cpu_base->lock
1260 * held to prevent that a timer is enqueued in our queue via 1269 * held to prevent that a timer is enqueued in our queue via
@@ -1330,8 +1339,12 @@ retry:
1330 * We need to prevent that we loop forever in the hrtimer 1339 * We need to prevent that we loop forever in the hrtimer
1331 * interrupt routine. We give it 3 attempts to avoid 1340 * interrupt routine. We give it 3 attempts to avoid
1332 * overreacting on some spurious event. 1341 * overreacting on some spurious event.
1342 *
1343 * Acquire base lock for updating the offsets and retrieving
1344 * the current time.
1333 */ 1345 */
1334 now = ktime_get(); 1346 raw_spin_lock(&cpu_base->lock);
1347 now = hrtimer_update_base(cpu_base);
1335 cpu_base->nr_retries++; 1348 cpu_base->nr_retries++;
1336 if (++retries < 3) 1349 if (++retries < 3)
1337 goto retry; 1350 goto retry;
@@ -1343,6 +1356,7 @@ retry:
1343 */ 1356 */
1344 cpu_base->nr_hangs++; 1357 cpu_base->nr_hangs++;
1345 cpu_base->hang_detected = 1; 1358 cpu_base->hang_detected = 1;
1359 raw_spin_unlock(&cpu_base->lock);
1346 delta = ktime_sub(now, entry_time); 1360 delta = ktime_sub(now, entry_time);
1347 if (delta.tv64 > cpu_base->max_hang_time.tv64) 1361 if (delta.tv64 > cpu_base->max_hang_time.tv64)
1348 cpu_base->max_hang_time = delta; 1362 cpu_base->max_hang_time = delta;
@@ -1395,6 +1409,13 @@ void hrtimer_peek_ahead_timers(void)
1395 1409
1396static void run_hrtimer_softirq(struct softirq_action *h) 1410static void run_hrtimer_softirq(struct softirq_action *h)
1397{ 1411{
1412 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1413
1414 if (cpu_base->clock_was_set) {
1415 cpu_base->clock_was_set = 0;
1416 clock_was_set();
1417 }
1418
1398 hrtimer_peek_ahead_timers(); 1419 hrtimer_peek_ahead_timers();
1399} 1420}
1400 1421
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 16b20e38c4a1..b3c7fd554250 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -184,11 +184,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
184 } 184 }
185 read_unlock(&tasklist_lock); 185 read_unlock(&tasklist_lock);
186 186
187 /* Firstly reap the EXIT_ZOMBIE children we may have. */
187 do { 188 do {
188 clear_thread_flag(TIF_SIGPENDING); 189 clear_thread_flag(TIF_SIGPENDING);
189 rc = sys_wait4(-1, NULL, __WALL, NULL); 190 rc = sys_wait4(-1, NULL, __WALL, NULL);
190 } while (rc != -ECHILD); 191 } while (rc != -ECHILD);
191 192
193 /*
194 * sys_wait4() above can't reap the TASK_DEAD children.
195 * Make sure they all go away, see __unhash_process().
196 */
197 for (;;) {
198 bool need_wait = false;
199
200 read_lock(&tasklist_lock);
201 if (!list_empty(&current->children)) {
202 __set_current_state(TASK_UNINTERRUPTIBLE);
203 need_wait = true;
204 }
205 read_unlock(&tasklist_lock);
206
207 if (!need_wait)
208 break;
209 schedule();
210 }
211
192 if (pid_ns->reboot) 212 if (pid_ns->reboot)
193 current->signal->group_exit_code = pid_ns->reboot; 213 current->signal->group_exit_code = pid_ns->reboot;
194 214
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 8b53db38a279..238025f5472e 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -27,7 +27,6 @@
27#include <linux/syscore_ops.h> 27#include <linux/syscore_ops.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/genhd.h> 29#include <linux/genhd.h>
30#include <scsi/scsi_scan.h>
31 30
32#include "power.h" 31#include "power.h"
33 32
@@ -748,13 +747,6 @@ static int software_resume(void)
748 async_synchronize_full(); 747 async_synchronize_full();
749 } 748 }
750 749
751 /*
752 * We can't depend on SCSI devices being available after loading
753 * one of their modules until scsi_complete_async_scans() is
754 * called and the resume device usually is a SCSI one.
755 */
756 scsi_complete_async_scans();
757
758 swsusp_resume_device = name_to_dev_t(resume_file); 750 swsusp_resume_device = name_to_dev_t(resume_file);
759 if (!swsusp_resume_device) { 751 if (!swsusp_resume_device) {
760 error = -ENODEV; 752 error = -ENODEV;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 91b0fd021a95..4ed81e74f86f 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -24,7 +24,6 @@
24#include <linux/console.h> 24#include <linux/console.h>
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/freezer.h> 26#include <linux/freezer.h>
27#include <scsi/scsi_scan.h>
28 27
29#include <asm/uaccess.h> 28#include <asm/uaccess.h>
30 29
@@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp)
84 * appear. 83 * appear.
85 */ 84 */
86 wait_for_device_probe(); 85 wait_for_device_probe();
87 scsi_complete_async_scans();
88 86
89 data->swap = -1; 87 data->swap = -1;
90 data->mode = O_WRONLY; 88 data->mode = O_WRONLY;
diff --git a/kernel/printk.c b/kernel/printk.c
index 32462d2b364a..ac4bc9e79465 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -193,12 +193,21 @@ static int console_may_schedule;
193 * separated by ',', and find the message after the ';' character. 193 * separated by ',', and find the message after the ';' character.
194 */ 194 */
195 195
196enum log_flags {
197 LOG_NOCONS = 1, /* already flushed, do not print to console */
198 LOG_NEWLINE = 2, /* text ended with a newline */
199 LOG_PREFIX = 4, /* text started with a prefix */
200 LOG_CONT = 8, /* text is a fragment of a continuation line */
201};
202
196struct log { 203struct log {
197 u64 ts_nsec; /* timestamp in nanoseconds */ 204 u64 ts_nsec; /* timestamp in nanoseconds */
198 u16 len; /* length of entire record */ 205 u16 len; /* length of entire record */
199 u16 text_len; /* length of text buffer */ 206 u16 text_len; /* length of text buffer */
200 u16 dict_len; /* length of dictionary buffer */ 207 u16 dict_len; /* length of dictionary buffer */
201 u16 level; /* syslog level + facility */ 208 u8 facility; /* syslog facility */
209 u8 flags:5; /* internal record flags */
210 u8 level:3; /* syslog level */
202}; 211};
203 212
204/* 213/*
@@ -210,6 +219,8 @@ static DEFINE_RAW_SPINLOCK(logbuf_lock);
210/* the next printk record to read by syslog(READ) or /proc/kmsg */ 219/* the next printk record to read by syslog(READ) or /proc/kmsg */
211static u64 syslog_seq; 220static u64 syslog_seq;
212static u32 syslog_idx; 221static u32 syslog_idx;
222static enum log_flags syslog_prev;
223static size_t syslog_partial;
213 224
214/* index and sequence number of the first record stored in the buffer */ 225/* index and sequence number of the first record stored in the buffer */
215static u64 log_first_seq; 226static u64 log_first_seq;
@@ -227,10 +238,10 @@ static u32 clear_idx;
227#define LOG_LINE_MAX 1024 238#define LOG_LINE_MAX 1024
228 239
229/* record buffer */ 240/* record buffer */
230#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 241#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
231#define LOG_ALIGN 4 242#define LOG_ALIGN 4
232#else 243#else
233#define LOG_ALIGN 8 244#define LOG_ALIGN __alignof__(struct log)
234#endif 245#endif
235#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 246#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
236static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 247static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -286,6 +297,7 @@ static u32 log_next(u32 idx)
286 297
287/* insert record into the buffer, discard old ones, update heads */ 298/* insert record into the buffer, discard old ones, update heads */
288static void log_store(int facility, int level, 299static void log_store(int facility, int level,
300 enum log_flags flags, u64 ts_nsec,
289 const char *dict, u16 dict_len, 301 const char *dict, u16 dict_len,
290 const char *text, u16 text_len) 302 const char *text, u16 text_len)
291{ 303{
@@ -329,8 +341,13 @@ static void log_store(int facility, int level,
329 msg->text_len = text_len; 341 msg->text_len = text_len;
330 memcpy(log_dict(msg), dict, dict_len); 342 memcpy(log_dict(msg), dict, dict_len);
331 msg->dict_len = dict_len; 343 msg->dict_len = dict_len;
332 msg->level = (facility << 3) | (level & 7); 344 msg->facility = facility;
333 msg->ts_nsec = local_clock(); 345 msg->level = level & 7;
346 msg->flags = flags & 0x1f;
347 if (ts_nsec > 0)
348 msg->ts_nsec = ts_nsec;
349 else
350 msg->ts_nsec = local_clock();
334 memset(log_dict(msg) + dict_len, 0, pad_len); 351 memset(log_dict(msg) + dict_len, 0, pad_len);
335 msg->len = sizeof(struct log) + text_len + dict_len + pad_len; 352 msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
336 353
@@ -414,21 +431,23 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
414 if (!user) 431 if (!user)
415 return -EBADF; 432 return -EBADF;
416 433
417 mutex_lock(&user->lock); 434 ret = mutex_lock_interruptible(&user->lock);
418 raw_spin_lock(&logbuf_lock); 435 if (ret)
436 return ret;
437 raw_spin_lock_irq(&logbuf_lock);
419 while (user->seq == log_next_seq) { 438 while (user->seq == log_next_seq) {
420 if (file->f_flags & O_NONBLOCK) { 439 if (file->f_flags & O_NONBLOCK) {
421 ret = -EAGAIN; 440 ret = -EAGAIN;
422 raw_spin_unlock(&logbuf_lock); 441 raw_spin_unlock_irq(&logbuf_lock);
423 goto out; 442 goto out;
424 } 443 }
425 444
426 raw_spin_unlock(&logbuf_lock); 445 raw_spin_unlock_irq(&logbuf_lock);
427 ret = wait_event_interruptible(log_wait, 446 ret = wait_event_interruptible(log_wait,
428 user->seq != log_next_seq); 447 user->seq != log_next_seq);
429 if (ret) 448 if (ret)
430 goto out; 449 goto out;
431 raw_spin_lock(&logbuf_lock); 450 raw_spin_lock_irq(&logbuf_lock);
432 } 451 }
433 452
434 if (user->seq < log_first_seq) { 453 if (user->seq < log_first_seq) {
@@ -436,7 +455,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
436 user->idx = log_first_idx; 455 user->idx = log_first_idx;
437 user->seq = log_first_seq; 456 user->seq = log_first_seq;
438 ret = -EPIPE; 457 ret = -EPIPE;
439 raw_spin_unlock(&logbuf_lock); 458 raw_spin_unlock_irq(&logbuf_lock);
440 goto out; 459 goto out;
441 } 460 }
442 461
@@ -444,13 +463,13 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
444 ts_usec = msg->ts_nsec; 463 ts_usec = msg->ts_nsec;
445 do_div(ts_usec, 1000); 464 do_div(ts_usec, 1000);
446 len = sprintf(user->buf, "%u,%llu,%llu;", 465 len = sprintf(user->buf, "%u,%llu,%llu;",
447 msg->level, user->seq, ts_usec); 466 (msg->facility << 3) | msg->level, user->seq, ts_usec);
448 467
449 /* escape non-printable characters */ 468 /* escape non-printable characters */
450 for (i = 0; i < msg->text_len; i++) { 469 for (i = 0; i < msg->text_len; i++) {
451 unsigned char c = log_text(msg)[i]; 470 unsigned char c = log_text(msg)[i];
452 471
453 if (c < ' ' || c >= 128) 472 if (c < ' ' || c >= 127 || c == '\\')
454 len += sprintf(user->buf + len, "\\x%02x", c); 473 len += sprintf(user->buf + len, "\\x%02x", c);
455 else 474 else
456 user->buf[len++] = c; 475 user->buf[len++] = c;
@@ -474,7 +493,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
474 continue; 493 continue;
475 } 494 }
476 495
477 if (c < ' ' || c >= 128) { 496 if (c < ' ' || c >= 127 || c == '\\') {
478 len += sprintf(user->buf + len, "\\x%02x", c); 497 len += sprintf(user->buf + len, "\\x%02x", c);
479 continue; 498 continue;
480 } 499 }
@@ -486,7 +505,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
486 505
487 user->idx = log_next(user->idx); 506 user->idx = log_next(user->idx);
488 user->seq++; 507 user->seq++;
489 raw_spin_unlock(&logbuf_lock); 508 raw_spin_unlock_irq(&logbuf_lock);
490 509
491 if (len > count) { 510 if (len > count) {
492 ret = -EINVAL; 511 ret = -EINVAL;
@@ -513,7 +532,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
513 if (offset) 532 if (offset)
514 return -ESPIPE; 533 return -ESPIPE;
515 534
516 raw_spin_lock(&logbuf_lock); 535 raw_spin_lock_irq(&logbuf_lock);
517 switch (whence) { 536 switch (whence) {
518 case SEEK_SET: 537 case SEEK_SET:
519 /* the first record */ 538 /* the first record */
@@ -537,7 +556,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
537 default: 556 default:
538 ret = -EINVAL; 557 ret = -EINVAL;
539 } 558 }
540 raw_spin_unlock(&logbuf_lock); 559 raw_spin_unlock_irq(&logbuf_lock);
541 return ret; 560 return ret;
542} 561}
543 562
@@ -551,14 +570,14 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
551 570
552 poll_wait(file, &log_wait, wait); 571 poll_wait(file, &log_wait, wait);
553 572
554 raw_spin_lock(&logbuf_lock); 573 raw_spin_lock_irq(&logbuf_lock);
555 if (user->seq < log_next_seq) { 574 if (user->seq < log_next_seq) {
556 /* return error when data has vanished underneath us */ 575 /* return error when data has vanished underneath us */
557 if (user->seq < log_first_seq) 576 if (user->seq < log_first_seq)
558 ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; 577 ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
559 ret = POLLIN|POLLRDNORM; 578 ret = POLLIN|POLLRDNORM;
560 } 579 }
561 raw_spin_unlock(&logbuf_lock); 580 raw_spin_unlock_irq(&logbuf_lock);
562 581
563 return ret; 582 return ret;
564} 583}
@@ -582,10 +601,10 @@ static int devkmsg_open(struct inode *inode, struct file *file)
582 601
583 mutex_init(&user->lock); 602 mutex_init(&user->lock);
584 603
585 raw_spin_lock(&logbuf_lock); 604 raw_spin_lock_irq(&logbuf_lock);
586 user->idx = log_first_idx; 605 user->idx = log_first_idx;
587 user->seq = log_first_seq; 606 user->seq = log_first_seq;
588 raw_spin_unlock(&logbuf_lock); 607 raw_spin_unlock_irq(&logbuf_lock);
589 608
590 file->private_data = user; 609 file->private_data = user;
591 return 0; 610 return 0;
@@ -785,44 +804,64 @@ static bool printk_time;
785#endif 804#endif
786module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 805module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
787 806
807static size_t print_time(u64 ts, char *buf)
808{
809 unsigned long rem_nsec;
810
811 if (!printk_time)
812 return 0;
813
814 if (!buf)
815 return 15;
816
817 rem_nsec = do_div(ts, 1000000000);
818 return sprintf(buf, "[%5lu.%06lu] ",
819 (unsigned long)ts, rem_nsec / 1000);
820}
821
788static size_t print_prefix(const struct log *msg, bool syslog, char *buf) 822static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
789{ 823{
790 size_t len = 0; 824 size_t len = 0;
825 unsigned int prefix = (msg->facility << 3) | msg->level;
791 826
792 if (syslog) { 827 if (syslog) {
793 if (buf) { 828 if (buf) {
794 len += sprintf(buf, "<%u>", msg->level); 829 len += sprintf(buf, "<%u>", prefix);
795 } else { 830 } else {
796 len += 3; 831 len += 3;
797 if (msg->level > 9) 832 if (prefix > 999)
798 len++; 833 len += 3;
799 if (msg->level > 99) 834 else if (prefix > 99)
835 len += 2;
836 else if (prefix > 9)
800 len++; 837 len++;
801 } 838 }
802 } 839 }
803 840
804 if (printk_time) { 841 len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
805 if (buf) {
806 unsigned long long ts = msg->ts_nsec;
807 unsigned long rem_nsec = do_div(ts, 1000000000);
808
809 len += sprintf(buf + len, "[%5lu.%06lu] ",
810 (unsigned long) ts, rem_nsec / 1000);
811 } else {
812 len += 15;
813 }
814 }
815
816 return len; 842 return len;
817} 843}
818 844
819static size_t msg_print_text(const struct log *msg, bool syslog, 845static size_t msg_print_text(const struct log *msg, enum log_flags prev,
820 char *buf, size_t size) 846 bool syslog, char *buf, size_t size)
821{ 847{
822 const char *text = log_text(msg); 848 const char *text = log_text(msg);
823 size_t text_size = msg->text_len; 849 size_t text_size = msg->text_len;
850 bool prefix = true;
851 bool newline = true;
824 size_t len = 0; 852 size_t len = 0;
825 853
854 if ((prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))
855 prefix = false;
856
857 if (msg->flags & LOG_CONT) {
858 if ((prev & LOG_CONT) && !(prev & LOG_NEWLINE))
859 prefix = false;
860
861 if (!(msg->flags & LOG_NEWLINE))
862 newline = false;
863 }
864
826 do { 865 do {
827 const char *next = memchr(text, '\n', text_size); 866 const char *next = memchr(text, '\n', text_size);
828 size_t text_len; 867 size_t text_len;
@@ -840,16 +879,22 @@ static size_t msg_print_text(const struct log *msg, bool syslog,
840 text_len + 1>= size - len) 879 text_len + 1>= size - len)
841 break; 880 break;
842 881
843 len += print_prefix(msg, syslog, buf + len); 882 if (prefix)
883 len += print_prefix(msg, syslog, buf + len);
844 memcpy(buf + len, text, text_len); 884 memcpy(buf + len, text, text_len);
845 len += text_len; 885 len += text_len;
846 buf[len++] = '\n'; 886 if (next || newline)
887 buf[len++] = '\n';
847 } else { 888 } else {
848 /* SYSLOG_ACTION_* buffer size only calculation */ 889 /* SYSLOG_ACTION_* buffer size only calculation */
849 len += print_prefix(msg, syslog, NULL); 890 if (prefix)
850 len += text_len + 1; 891 len += print_prefix(msg, syslog, NULL);
892 len += text_len;
893 if (next || newline)
894 len++;
851 } 895 }
852 896
897 prefix = true;
853 text = next; 898 text = next;
854 } while (text); 899 } while (text);
855 900
@@ -860,26 +905,60 @@ static int syslog_print(char __user *buf, int size)
860{ 905{
861 char *text; 906 char *text;
862 struct log *msg; 907 struct log *msg;
863 int len; 908 int len = 0;
864 909
865 text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); 910 text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
866 if (!text) 911 if (!text)
867 return -ENOMEM; 912 return -ENOMEM;
868 913
869 raw_spin_lock_irq(&logbuf_lock); 914 while (size > 0) {
870 if (syslog_seq < log_first_seq) { 915 size_t n;
871 /* messages are gone, move to first one */ 916 size_t skip;
872 syslog_seq = log_first_seq;
873 syslog_idx = log_first_idx;
874 }
875 msg = log_from_idx(syslog_idx);
876 len = msg_print_text(msg, true, text, LOG_LINE_MAX);
877 syslog_idx = log_next(syslog_idx);
878 syslog_seq++;
879 raw_spin_unlock_irq(&logbuf_lock);
880 917
881 if (len > 0 && copy_to_user(buf, text, len)) 918 raw_spin_lock_irq(&logbuf_lock);
882 len = -EFAULT; 919 if (syslog_seq < log_first_seq) {
920 /* messages are gone, move to first one */
921 syslog_seq = log_first_seq;
922 syslog_idx = log_first_idx;
923 syslog_prev = 0;
924 syslog_partial = 0;
925 }
926 if (syslog_seq == log_next_seq) {
927 raw_spin_unlock_irq(&logbuf_lock);
928 break;
929 }
930
931 skip = syslog_partial;
932 msg = log_from_idx(syslog_idx);
933 n = msg_print_text(msg, syslog_prev, true, text, LOG_LINE_MAX);
934 if (n - syslog_partial <= size) {
935 /* message fits into buffer, move forward */
936 syslog_idx = log_next(syslog_idx);
937 syslog_seq++;
938 syslog_prev = msg->flags;
939 n -= syslog_partial;
940 syslog_partial = 0;
941 } else if (!len){
942 /* partial read(), remember position */
943 n = size;
944 syslog_partial += n;
945 } else
946 n = 0;
947 raw_spin_unlock_irq(&logbuf_lock);
948
949 if (!n)
950 break;
951
952 if (copy_to_user(buf, text + skip, n)) {
953 if (!len)
954 len = -EFAULT;
955 break;
956 }
957
958 len += n;
959 size -= n;
960 buf += n;
961 }
883 962
884 kfree(text); 963 kfree(text);
885 return len; 964 return len;
@@ -899,6 +978,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
899 u64 next_seq; 978 u64 next_seq;
900 u64 seq; 979 u64 seq;
901 u32 idx; 980 u32 idx;
981 enum log_flags prev;
902 982
903 if (clear_seq < log_first_seq) { 983 if (clear_seq < log_first_seq) {
904 /* messages are gone, move to first available one */ 984 /* messages are gone, move to first available one */
@@ -909,41 +989,47 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
909 /* 989 /*
910 * Find first record that fits, including all following records, 990 * Find first record that fits, including all following records,
911 * into the user-provided buffer for this dump. 991 * into the user-provided buffer for this dump.
912 */ 992 */
913 seq = clear_seq; 993 seq = clear_seq;
914 idx = clear_idx; 994 idx = clear_idx;
995 prev = 0;
915 while (seq < log_next_seq) { 996 while (seq < log_next_seq) {
916 struct log *msg = log_from_idx(idx); 997 struct log *msg = log_from_idx(idx);
917 998
918 len += msg_print_text(msg, true, NULL, 0); 999 len += msg_print_text(msg, prev, true, NULL, 0);
919 idx = log_next(idx); 1000 idx = log_next(idx);
920 seq++; 1001 seq++;
921 } 1002 }
1003
1004 /* move first record forward until length fits into the buffer */
922 seq = clear_seq; 1005 seq = clear_seq;
923 idx = clear_idx; 1006 idx = clear_idx;
1007 prev = 0;
924 while (len > size && seq < log_next_seq) { 1008 while (len > size && seq < log_next_seq) {
925 struct log *msg = log_from_idx(idx); 1009 struct log *msg = log_from_idx(idx);
926 1010
927 len -= msg_print_text(msg, true, NULL, 0); 1011 len -= msg_print_text(msg, prev, true, NULL, 0);
928 idx = log_next(idx); 1012 idx = log_next(idx);
929 seq++; 1013 seq++;
930 } 1014 }
931 1015
932 /* last message in this dump */ 1016 /* last message fitting into this dump */
933 next_seq = log_next_seq; 1017 next_seq = log_next_seq;
934 1018
935 len = 0; 1019 len = 0;
1020 prev = 0;
936 while (len >= 0 && seq < next_seq) { 1021 while (len >= 0 && seq < next_seq) {
937 struct log *msg = log_from_idx(idx); 1022 struct log *msg = log_from_idx(idx);
938 int textlen; 1023 int textlen;
939 1024
940 textlen = msg_print_text(msg, true, text, LOG_LINE_MAX); 1025 textlen = msg_print_text(msg, prev, true, text, LOG_LINE_MAX);
941 if (textlen < 0) { 1026 if (textlen < 0) {
942 len = textlen; 1027 len = textlen;
943 break; 1028 break;
944 } 1029 }
945 idx = log_next(idx); 1030 idx = log_next(idx);
946 seq++; 1031 seq++;
1032 prev = msg->flags;
947 1033
948 raw_spin_unlock_irq(&logbuf_lock); 1034 raw_spin_unlock_irq(&logbuf_lock);
949 if (copy_to_user(buf + len, text, textlen)) 1035 if (copy_to_user(buf + len, text, textlen))
@@ -956,6 +1042,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
956 /* messages are gone, move to next one */ 1042 /* messages are gone, move to next one */
957 seq = log_first_seq; 1043 seq = log_first_seq;
958 idx = log_first_idx; 1044 idx = log_first_idx;
1045 prev = 0;
959 } 1046 }
960 } 1047 }
961 } 1048 }
@@ -1027,6 +1114,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1027 /* Clear ring buffer */ 1114 /* Clear ring buffer */
1028 case SYSLOG_ACTION_CLEAR: 1115 case SYSLOG_ACTION_CLEAR:
1029 syslog_print_all(NULL, 0, true); 1116 syslog_print_all(NULL, 0, true);
1117 break;
1030 /* Disable logging to console */ 1118 /* Disable logging to console */
1031 case SYSLOG_ACTION_CONSOLE_OFF: 1119 case SYSLOG_ACTION_CONSOLE_OFF:
1032 if (saved_console_loglevel == -1) 1120 if (saved_console_loglevel == -1)
@@ -1059,6 +1147,8 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1059 /* messages are gone, move to first one */ 1147 /* messages are gone, move to first one */
1060 syslog_seq = log_first_seq; 1148 syslog_seq = log_first_seq;
1061 syslog_idx = log_first_idx; 1149 syslog_idx = log_first_idx;
1150 syslog_prev = 0;
1151 syslog_partial = 0;
1062 } 1152 }
1063 if (from_file) { 1153 if (from_file) {
1064 /* 1154 /*
@@ -1068,19 +1158,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1068 */ 1158 */
1069 error = log_next_idx - syslog_idx; 1159 error = log_next_idx - syslog_idx;
1070 } else { 1160 } else {
1071 u64 seq; 1161 u64 seq = syslog_seq;
1072 u32 idx; 1162 u32 idx = syslog_idx;
1163 enum log_flags prev = syslog_prev;
1073 1164
1074 error = 0; 1165 error = 0;
1075 seq = syslog_seq;
1076 idx = syslog_idx;
1077 while (seq < log_next_seq) { 1166 while (seq < log_next_seq) {
1078 struct log *msg = log_from_idx(idx); 1167 struct log *msg = log_from_idx(idx);
1079 1168
1080 error += msg_print_text(msg, true, NULL, 0); 1169 error += msg_print_text(msg, prev, true, NULL, 0);
1081 idx = log_next(idx); 1170 idx = log_next(idx);
1082 seq++; 1171 seq++;
1172 prev = msg->flags;
1083 } 1173 }
1174 error -= syslog_partial;
1084 } 1175 }
1085 raw_spin_unlock_irq(&logbuf_lock); 1176 raw_spin_unlock_irq(&logbuf_lock);
1086 break; 1177 break;
@@ -1101,21 +1192,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
1101 return do_syslog(type, buf, len, SYSLOG_FROM_CALL); 1192 return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
1102} 1193}
1103 1194
1104#ifdef CONFIG_KGDB_KDB
1105/* kdb dmesg command needs access to the syslog buffer. do_syslog()
1106 * uses locks so it cannot be used during debugging. Just tell kdb
1107 * where the start and end of the physical and logical logs are. This
1108 * is equivalent to do_syslog(3).
1109 */
1110void kdb_syslog_data(char *syslog_data[4])
1111{
1112 syslog_data[0] = log_buf;
1113 syslog_data[1] = log_buf + log_buf_len;
1114 syslog_data[2] = log_buf + log_first_idx;
1115 syslog_data[3] = log_buf + log_next_idx;
1116}
1117#endif /* CONFIG_KGDB_KDB */
1118
1119static bool __read_mostly ignore_loglevel; 1195static bool __read_mostly ignore_loglevel;
1120 1196
1121static int __init ignore_loglevel_setup(char *str) 1197static int __init ignore_loglevel_setup(char *str)
@@ -1259,22 +1335,98 @@ static inline void printk_delay(void)
1259 } 1335 }
1260} 1336}
1261 1337
1338/*
1339 * Continuation lines are buffered, and not committed to the record buffer
1340 * until the line is complete, or a race forces it. The line fragments
1341 * though, are printed immediately to the consoles to ensure everything has
1342 * reached the console in case of a kernel crash.
1343 */
1344static struct cont {
1345 char buf[LOG_LINE_MAX];
1346 size_t len; /* length == 0 means unused buffer */
1347 size_t cons; /* bytes written to console */
1348 struct task_struct *owner; /* task of first print*/
1349 u64 ts_nsec; /* time of first print */
1350 u8 level; /* log level of first message */
1351 u8 facility; /* log level of first message */
1352 bool flushed:1; /* buffer sealed and committed */
1353} cont;
1354
1355static void cont_flush(void)
1356{
1357 if (cont.flushed)
1358 return;
1359 if (cont.len == 0)
1360 return;
1361
1362 log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec,
1363 NULL, 0, cont.buf, cont.len);
1364
1365 cont.flushed = true;
1366}
1367
1368static bool cont_add(int facility, int level, const char *text, size_t len)
1369{
1370 if (cont.len && cont.flushed)
1371 return false;
1372
1373 if (cont.len + len > sizeof(cont.buf)) {
1374 cont_flush();
1375 return false;
1376 }
1377
1378 if (!cont.len) {
1379 cont.facility = facility;
1380 cont.level = level;
1381 cont.owner = current;
1382 cont.ts_nsec = local_clock();
1383 cont.cons = 0;
1384 cont.flushed = false;
1385 }
1386
1387 memcpy(cont.buf + cont.len, text, len);
1388 cont.len += len;
1389 return true;
1390}
1391
1392static size_t cont_print_text(char *text, size_t size)
1393{
1394 size_t textlen = 0;
1395 size_t len;
1396
1397 if (cont.cons == 0) {
1398 textlen += print_time(cont.ts_nsec, text);
1399 size -= textlen;
1400 }
1401
1402 len = cont.len - cont.cons;
1403 if (len > 0) {
1404 if (len+1 > size)
1405 len = size-1;
1406 memcpy(text + textlen, cont.buf + cont.cons, len);
1407 textlen += len;
1408 cont.cons = cont.len;
1409 }
1410
1411 if (cont.flushed) {
1412 text[textlen++] = '\n';
1413 /* got everything, release buffer */
1414 cont.len = 0;
1415 }
1416 return textlen;
1417}
1418
1262asmlinkage int vprintk_emit(int facility, int level, 1419asmlinkage int vprintk_emit(int facility, int level,
1263 const char *dict, size_t dictlen, 1420 const char *dict, size_t dictlen,
1264 const char *fmt, va_list args) 1421 const char *fmt, va_list args)
1265{ 1422{
1266 static int recursion_bug; 1423 static int recursion_bug;
1267 static char cont_buf[LOG_LINE_MAX];
1268 static size_t cont_len;
1269 static int cont_level;
1270 static struct task_struct *cont_task;
1271 static char textbuf[LOG_LINE_MAX]; 1424 static char textbuf[LOG_LINE_MAX];
1272 char *text = textbuf; 1425 char *text = textbuf;
1273 size_t text_len; 1426 size_t text_len;
1427 enum log_flags lflags = 0;
1274 unsigned long flags; 1428 unsigned long flags;
1275 int this_cpu; 1429 int this_cpu;
1276 bool newline = false;
1277 bool prefix = false;
1278 int printed_len = 0; 1430 int printed_len = 0;
1279 1431
1280 boot_delay_msec(); 1432 boot_delay_msec();
@@ -1313,7 +1465,8 @@ asmlinkage int vprintk_emit(int facility, int level,
1313 recursion_bug = 0; 1465 recursion_bug = 0;
1314 printed_len += strlen(recursion_msg); 1466 printed_len += strlen(recursion_msg);
1315 /* emit KERN_CRIT message */ 1467 /* emit KERN_CRIT message */
1316 log_store(0, 2, NULL, 0, recursion_msg, printed_len); 1468 log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
1469 NULL, 0, recursion_msg, printed_len);
1317 } 1470 }
1318 1471
1319 /* 1472 /*
@@ -1325,7 +1478,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1325 /* mark and strip a trailing newline */ 1478 /* mark and strip a trailing newline */
1326 if (text_len && text[text_len-1] == '\n') { 1479 if (text_len && text[text_len-1] == '\n') {
1327 text_len--; 1480 text_len--;
1328 newline = true; 1481 lflags |= LOG_NEWLINE;
1329 } 1482 }
1330 1483
1331 /* strip syslog prefix and extract log level or control flags */ 1484 /* strip syslog prefix and extract log level or control flags */
@@ -1335,7 +1488,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1335 if (level == -1) 1488 if (level == -1)
1336 level = text[1] - '0'; 1489 level = text[1] - '0';
1337 case 'd': /* KERN_DEFAULT */ 1490 case 'd': /* KERN_DEFAULT */
1338 prefix = true; 1491 lflags |= LOG_PREFIX;
1339 case 'c': /* KERN_CONT */ 1492 case 'c': /* KERN_CONT */
1340 text += 3; 1493 text += 3;
1341 text_len -= 3; 1494 text_len -= 3;
@@ -1345,61 +1498,41 @@ asmlinkage int vprintk_emit(int facility, int level,
1345 if (level == -1) 1498 if (level == -1)
1346 level = default_message_loglevel; 1499 level = default_message_loglevel;
1347 1500
1348 if (dict) { 1501 if (dict)
1349 prefix = true; 1502 lflags |= LOG_PREFIX|LOG_NEWLINE;
1350 newline = true;
1351 }
1352
1353 if (!newline) {
1354 if (cont_len && (prefix || cont_task != current)) {
1355 /*
1356 * Flush earlier buffer, which is either from a
1357 * different thread, or when we got a new prefix.
1358 */
1359 log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
1360 cont_len = 0;
1361 }
1362 1503
1363 if (!cont_len) { 1504 if (!(lflags & LOG_NEWLINE)) {
1364 cont_level = level; 1505 /*
1365 cont_task = current; 1506 * Flush the conflicting buffer. An earlier newline was missing,
1366 } 1507 * or another task also prints continuation lines.
1508 */
1509 if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
1510 cont_flush();
1367 1511
1368 /* buffer or append to earlier buffer from the same thread */ 1512 /* buffer line if possible, otherwise store it right away */
1369 if (cont_len + text_len > sizeof(cont_buf)) 1513 if (!cont_add(facility, level, text, text_len))
1370 text_len = sizeof(cont_buf) - cont_len; 1514 log_store(facility, level, lflags | LOG_CONT, 0,
1371 memcpy(cont_buf + cont_len, text, text_len); 1515 dict, dictlen, text, text_len);
1372 cont_len += text_len;
1373 } else { 1516 } else {
1374 if (cont_len && cont_task == current) { 1517 bool stored = false;
1375 if (prefix) {
1376 /*
1377 * New prefix from the same thread; flush. We
1378 * either got no earlier newline, or we race
1379 * with an interrupt.
1380 */
1381 log_store(facility, cont_level,
1382 NULL, 0, cont_buf, cont_len);
1383 cont_len = 0;
1384 }
1385 1518
1386 /* append to the earlier buffer and flush */ 1519 /*
1387 if (cont_len + text_len > sizeof(cont_buf)) 1520 * If an earlier newline was missing and it was the same task,
1388 text_len = sizeof(cont_buf) - cont_len; 1521 * either merge it with the current buffer and flush, or if
1389 memcpy(cont_buf + cont_len, text, text_len); 1522 * there was a race with interrupts (prefix == true) then just
1390 cont_len += text_len; 1523 * flush it out and store this line separately.
1391 log_store(facility, cont_level, 1524 */
1392 NULL, 0, cont_buf, cont_len); 1525 if (cont.len && cont.owner == current) {
1393 cont_len = 0; 1526 if (!(lflags & LOG_PREFIX))
1394 cont_task = NULL; 1527 stored = cont_add(facility, level, text, text_len);
1395 printed_len = cont_len; 1528 cont_flush();
1396 } else {
1397 /* ordinary single and terminated line */
1398 log_store(facility, level,
1399 dict, dictlen, text, text_len);
1400 printed_len = text_len;
1401 } 1529 }
1530
1531 if (!stored)
1532 log_store(facility, level, lflags, 0,
1533 dict, dictlen, text, text_len);
1402 } 1534 }
1535 printed_len += text_len;
1403 1536
1404 /* 1537 /*
1405 * Try to acquire and then immediately release the console semaphore. 1538 * Try to acquire and then immediately release the console semaphore.
@@ -1486,11 +1619,18 @@ EXPORT_SYMBOL(printk);
1486#else 1619#else
1487 1620
1488#define LOG_LINE_MAX 0 1621#define LOG_LINE_MAX 0
1622static struct cont {
1623 size_t len;
1624 size_t cons;
1625 u8 level;
1626 bool flushed:1;
1627} cont;
1489static struct log *log_from_idx(u32 idx) { return NULL; } 1628static struct log *log_from_idx(u32 idx) { return NULL; }
1490static u32 log_next(u32 idx) { return 0; } 1629static u32 log_next(u32 idx) { return 0; }
1491static void call_console_drivers(int level, const char *text, size_t len) {} 1630static void call_console_drivers(int level, const char *text, size_t len) {}
1492static size_t msg_print_text(const struct log *msg, bool syslog, 1631static size_t msg_print_text(const struct log *msg, enum log_flags prev,
1493 char *buf, size_t size) { return 0; } 1632 bool syslog, char *buf, size_t size) { return 0; }
1633static size_t cont_print_text(char *text, size_t size) { return 0; }
1494 1634
1495#endif /* CONFIG_PRINTK */ 1635#endif /* CONFIG_PRINTK */
1496 1636
@@ -1765,6 +1905,7 @@ void wake_up_klogd(void)
1765/* the next printk record to write to the console */ 1905/* the next printk record to write to the console */
1766static u64 console_seq; 1906static u64 console_seq;
1767static u32 console_idx; 1907static u32 console_idx;
1908static enum log_flags console_prev;
1768 1909
1769/** 1910/**
1770 * console_unlock - unlock the console system 1911 * console_unlock - unlock the console system
@@ -1782,6 +1923,7 @@ static u32 console_idx;
1782 */ 1923 */
1783void console_unlock(void) 1924void console_unlock(void)
1784{ 1925{
1926 static char text[LOG_LINE_MAX];
1785 static u64 seen_seq; 1927 static u64 seen_seq;
1786 unsigned long flags; 1928 unsigned long flags;
1787 bool wake_klogd = false; 1929 bool wake_klogd = false;
@@ -1794,10 +1936,23 @@ void console_unlock(void)
1794 1936
1795 console_may_schedule = 0; 1937 console_may_schedule = 0;
1796 1938
1939 /* flush buffered message fragment immediately to console */
1940 raw_spin_lock_irqsave(&logbuf_lock, flags);
1941 if (cont.len && (cont.cons < cont.len || cont.flushed)) {
1942 size_t len;
1943
1944 len = cont_print_text(text, sizeof(text));
1945 raw_spin_unlock(&logbuf_lock);
1946 stop_critical_timings();
1947 call_console_drivers(cont.level, text, len);
1948 start_critical_timings();
1949 local_irq_restore(flags);
1950 } else
1951 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
1952
1797again: 1953again:
1798 for (;;) { 1954 for (;;) {
1799 struct log *msg; 1955 struct log *msg;
1800 static char text[LOG_LINE_MAX];
1801 size_t len; 1956 size_t len;
1802 int level; 1957 int level;
1803 1958
@@ -1811,18 +1966,35 @@ again:
1811 /* messages are gone, move to first one */ 1966 /* messages are gone, move to first one */
1812 console_seq = log_first_seq; 1967 console_seq = log_first_seq;
1813 console_idx = log_first_idx; 1968 console_idx = log_first_idx;
1969 console_prev = 0;
1814 } 1970 }
1815 1971skip:
1816 if (console_seq == log_next_seq) 1972 if (console_seq == log_next_seq)
1817 break; 1973 break;
1818 1974
1819 msg = log_from_idx(console_idx); 1975 msg = log_from_idx(console_idx);
1820 level = msg->level & 7; 1976 if (msg->flags & LOG_NOCONS) {
1821 1977 /*
1822 len = msg_print_text(msg, false, text, sizeof(text)); 1978 * Skip record we have buffered and already printed
1979 * directly to the console when we received it.
1980 */
1981 console_idx = log_next(console_idx);
1982 console_seq++;
1983 /*
1984 * We will get here again when we register a new
1985 * CON_PRINTBUFFER console. Clear the flag so we
1986 * will properly dump everything later.
1987 */
1988 msg->flags &= ~LOG_NOCONS;
1989 goto skip;
1990 }
1823 1991
1992 level = msg->level;
1993 len = msg_print_text(msg, console_prev, false,
1994 text, sizeof(text));
1824 console_idx = log_next(console_idx); 1995 console_idx = log_next(console_idx);
1825 console_seq++; 1996 console_seq++;
1997 console_prev = msg->flags;
1826 raw_spin_unlock(&logbuf_lock); 1998 raw_spin_unlock(&logbuf_lock);
1827 1999
1828 stop_critical_timings(); /* don't trace print latency */ 2000 stop_critical_timings(); /* don't trace print latency */
@@ -2085,6 +2257,7 @@ void register_console(struct console *newcon)
2085 raw_spin_lock_irqsave(&logbuf_lock, flags); 2257 raw_spin_lock_irqsave(&logbuf_lock, flags);
2086 console_seq = syslog_seq; 2258 console_seq = syslog_seq;
2087 console_idx = syslog_idx; 2259 console_idx = syslog_idx;
2260 console_prev = syslog_prev;
2088 raw_spin_unlock_irqrestore(&logbuf_lock, flags); 2261 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2089 /* 2262 /*
2090 * We're about to replay the log buffer. Only do this to the 2263 * We're about to replay the log buffer. Only do this to the
@@ -2300,48 +2473,256 @@ module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
2300 * kmsg_dump - dump kernel log to kernel message dumpers. 2473 * kmsg_dump - dump kernel log to kernel message dumpers.
2301 * @reason: the reason (oops, panic etc) for dumping 2474 * @reason: the reason (oops, panic etc) for dumping
2302 * 2475 *
2303 * Iterate through each of the dump devices and call the oops/panic 2476 * Call each of the registered dumper's dump() callback, which can
2304 * callbacks with the log buffer. 2477 * retrieve the kmsg records with kmsg_dump_get_line() or
2478 * kmsg_dump_get_buffer().
2305 */ 2479 */
2306void kmsg_dump(enum kmsg_dump_reason reason) 2480void kmsg_dump(enum kmsg_dump_reason reason)
2307{ 2481{
2308 u64 idx;
2309 struct kmsg_dumper *dumper; 2482 struct kmsg_dumper *dumper;
2310 const char *s1, *s2;
2311 unsigned long l1, l2;
2312 unsigned long flags; 2483 unsigned long flags;
2313 2484
2314 if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) 2485 if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
2315 return; 2486 return;
2316 2487
2317 /* Theoretically, the log could move on after we do this, but 2488 rcu_read_lock();
2318 there's not a lot we can do about that. The new messages 2489 list_for_each_entry_rcu(dumper, &dump_list, list) {
2319 will overwrite the start of what we dump. */ 2490 if (dumper->max_reason && reason > dumper->max_reason)
2491 continue;
2492
2493 /* initialize iterator with data about the stored records */
2494 dumper->active = true;
2495
2496 raw_spin_lock_irqsave(&logbuf_lock, flags);
2497 dumper->cur_seq = clear_seq;
2498 dumper->cur_idx = clear_idx;
2499 dumper->next_seq = log_next_seq;
2500 dumper->next_idx = log_next_idx;
2501 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2502
2503 /* invoke dumper which will iterate over records */
2504 dumper->dump(dumper, reason);
2505
2506 /* reset iterator */
2507 dumper->active = false;
2508 }
2509 rcu_read_unlock();
2510}
2511
2512/**
2513 * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
2514 * @dumper: registered kmsg dumper
2515 * @syslog: include the "<4>" prefixes
2516 * @line: buffer to copy the line to
2517 * @size: maximum size of the buffer
2518 * @len: length of line placed into buffer
2519 *
2520 * Start at the beginning of the kmsg buffer, with the oldest kmsg
2521 * record, and copy one record into the provided buffer.
2522 *
2523 * Consecutive calls will return the next available record moving
2524 * towards the end of the buffer with the youngest messages.
2525 *
2526 * A return value of FALSE indicates that there are no more records to
2527 * read.
2528 *
2529 * The function is similar to kmsg_dump_get_line(), but grabs no locks.
2530 */
2531bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
2532 char *line, size_t size, size_t *len)
2533{
2534 struct log *msg;
2535 size_t l = 0;
2536 bool ret = false;
2537
2538 if (!dumper->active)
2539 goto out;
2540
2541 if (dumper->cur_seq < log_first_seq) {
2542 /* messages are gone, move to first available one */
2543 dumper->cur_seq = log_first_seq;
2544 dumper->cur_idx = log_first_idx;
2545 }
2546
2547 /* last entry */
2548 if (dumper->cur_seq >= log_next_seq)
2549 goto out;
2550
2551 msg = log_from_idx(dumper->cur_idx);
2552 l = msg_print_text(msg, 0, syslog, line, size);
2553
2554 dumper->cur_idx = log_next(dumper->cur_idx);
2555 dumper->cur_seq++;
2556 ret = true;
2557out:
2558 if (len)
2559 *len = l;
2560 return ret;
2561}
2562
2563/**
2564 * kmsg_dump_get_line - retrieve one kmsg log line
2565 * @dumper: registered kmsg dumper
2566 * @syslog: include the "<4>" prefixes
2567 * @line: buffer to copy the line to
2568 * @size: maximum size of the buffer
2569 * @len: length of line placed into buffer
2570 *
2571 * Start at the beginning of the kmsg buffer, with the oldest kmsg
2572 * record, and copy one record into the provided buffer.
2573 *
2574 * Consecutive calls will return the next available record moving
2575 * towards the end of the buffer with the youngest messages.
2576 *
2577 * A return value of FALSE indicates that there are no more records to
2578 * read.
2579 */
2580bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
2581 char *line, size_t size, size_t *len)
2582{
2583 unsigned long flags;
2584 bool ret;
2585
2586 raw_spin_lock_irqsave(&logbuf_lock, flags);
2587 ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
2588 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2589
2590 return ret;
2591}
2592EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
2593
2594/**
2595 * kmsg_dump_get_buffer - copy kmsg log lines
2596 * @dumper: registered kmsg dumper
2597 * @syslog: include the "<4>" prefixes
2598 * @buf: buffer to copy the line to
2599 * @size: maximum size of the buffer
2600 * @len: length of line placed into buffer
2601 *
2602 * Start at the end of the kmsg buffer and fill the provided buffer
2603 * with as many of the the *youngest* kmsg records that fit into it.
2604 * If the buffer is large enough, all available kmsg records will be
2605 * copied with a single call.
2606 *
2607 * Consecutive calls will fill the buffer with the next block of
2608 * available older records, not including the earlier retrieved ones.
2609 *
2610 * A return value of FALSE indicates that there are no more records to
2611 * read.
2612 */
2613bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2614 char *buf, size_t size, size_t *len)
2615{
2616 unsigned long flags;
2617 u64 seq;
2618 u32 idx;
2619 u64 next_seq;
2620 u32 next_idx;
2621 enum log_flags prev;
2622 size_t l = 0;
2623 bool ret = false;
2624
2625 if (!dumper->active)
2626 goto out;
2320 2627
2321 raw_spin_lock_irqsave(&logbuf_lock, flags); 2628 raw_spin_lock_irqsave(&logbuf_lock, flags);
2322 if (syslog_seq < log_first_seq) 2629 if (dumper->cur_seq < log_first_seq) {
2323 idx = syslog_idx; 2630 /* messages are gone, move to first available one */
2324 else 2631 dumper->cur_seq = log_first_seq;
2325 idx = log_first_idx; 2632 dumper->cur_idx = log_first_idx;
2633 }
2634
2635 /* last entry */
2636 if (dumper->cur_seq >= dumper->next_seq) {
2637 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2638 goto out;
2639 }
2326 2640
2327 if (idx > log_next_idx) { 2641 /* calculate length of entire buffer */
2328 s1 = log_buf; 2642 seq = dumper->cur_seq;
2329 l1 = log_next_idx; 2643 idx = dumper->cur_idx;
2644 prev = 0;
2645 while (seq < dumper->next_seq) {
2646 struct log *msg = log_from_idx(idx);
2647
2648 l += msg_print_text(msg, prev, true, NULL, 0);
2649 idx = log_next(idx);
2650 seq++;
2651 prev = msg->flags;
2652 }
2330 2653
2331 s2 = log_buf + idx; 2654 /* move first record forward until length fits into the buffer */
2332 l2 = log_buf_len - idx; 2655 seq = dumper->cur_seq;
2333 } else { 2656 idx = dumper->cur_idx;
2334 s1 = ""; 2657 prev = 0;
2335 l1 = 0; 2658 while (l > size && seq < dumper->next_seq) {
2659 struct log *msg = log_from_idx(idx);
2660
2661 l -= msg_print_text(msg, prev, true, NULL, 0);
2662 idx = log_next(idx);
2663 seq++;
2664 prev = msg->flags;
2665 }
2666
2667 /* last message in next interation */
2668 next_seq = seq;
2669 next_idx = idx;
2670
2671 l = 0;
2672 prev = 0;
2673 while (seq < dumper->next_seq) {
2674 struct log *msg = log_from_idx(idx);
2336 2675
2337 s2 = log_buf + idx; 2676 l += msg_print_text(msg, prev, syslog, buf + l, size - l);
2338 l2 = log_next_idx - idx; 2677 idx = log_next(idx);
2678 seq++;
2679 prev = msg->flags;
2339 } 2680 }
2681
2682 dumper->next_seq = next_seq;
2683 dumper->next_idx = next_idx;
2684 ret = true;
2340 raw_spin_unlock_irqrestore(&logbuf_lock, flags); 2685 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2686out:
2687 if (len)
2688 *len = l;
2689 return ret;
2690}
2691EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
2341 2692
2342 rcu_read_lock(); 2693/**
2343 list_for_each_entry_rcu(dumper, &dump_list, list) 2694 * kmsg_dump_rewind_nolock - reset the interator (unlocked version)
2344 dumper->dump(dumper, reason, s1, l1, s2, l2); 2695 * @dumper: registered kmsg dumper
2345 rcu_read_unlock(); 2696 *
2697 * Reset the dumper's iterator so that kmsg_dump_get_line() and
2698 * kmsg_dump_get_buffer() can be called again and used multiple
2699 * times within the same dumper.dump() callback.
2700 *
2701 * The function is similar to kmsg_dump_rewind(), but grabs no locks.
2702 */
2703void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
2704{
2705 dumper->cur_seq = clear_seq;
2706 dumper->cur_idx = clear_idx;
2707 dumper->next_seq = log_next_seq;
2708 dumper->next_idx = log_next_idx;
2709}
2710
2711/**
2712 * kmsg_dump_rewind - reset the interator
2713 * @dumper: registered kmsg dumper
2714 *
2715 * Reset the dumper's iterator so that kmsg_dump_get_line() and
2716 * kmsg_dump_get_buffer() can be called again and used multiple
2717 * times within the same dumper.dump() callback.
2718 */
2719void kmsg_dump_rewind(struct kmsg_dumper *dumper)
2720{
2721 unsigned long flags;
2722
2723 raw_spin_lock_irqsave(&logbuf_lock, flags);
2724 kmsg_dump_rewind_nolock(dumper);
2725 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2346} 2726}
2727EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
2347#endif 2728#endif
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 3b0f1337f75b..4b97bba7396e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -201,6 +201,7 @@ void rcu_note_context_switch(int cpu)
201{ 201{
202 trace_rcu_utilization("Start context switch"); 202 trace_rcu_utilization("Start context switch");
203 rcu_sched_qs(cpu); 203 rcu_sched_qs(cpu);
204 rcu_preempt_note_context_switch(cpu);
204 trace_rcu_utilization("End context switch"); 205 trace_rcu_utilization("End context switch");
205} 206}
206EXPORT_SYMBOL_GPL(rcu_note_context_switch); 207EXPORT_SYMBOL_GPL(rcu_note_context_switch);
@@ -1530,7 +1531,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1530{ 1531{
1531 unsigned long flags; 1532 unsigned long flags;
1532 struct rcu_head *next, *list, **tail; 1533 struct rcu_head *next, *list, **tail;
1533 int bl, count, count_lazy; 1534 int bl, count, count_lazy, i;
1534 1535
1535 /* If no callbacks are ready, just return.*/ 1536 /* If no callbacks are ready, just return.*/
1536 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1537 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -1553,9 +1554,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1553 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1554 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1554 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1555 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1555 tail = rdp->nxttail[RCU_DONE_TAIL]; 1556 tail = rdp->nxttail[RCU_DONE_TAIL];
1556 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) 1557 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
1557 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) 1558 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1558 rdp->nxttail[count] = &rdp->nxtlist; 1559 rdp->nxttail[i] = &rdp->nxtlist;
1559 local_irq_restore(flags); 1560 local_irq_restore(flags);
1560 1561
1561 /* Invoke callbacks. */ 1562 /* Invoke callbacks. */
@@ -1583,9 +1584,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1583 if (list != NULL) { 1584 if (list != NULL) {
1584 *tail = rdp->nxtlist; 1585 *tail = rdp->nxtlist;
1585 rdp->nxtlist = list; 1586 rdp->nxtlist = list;
1586 for (count = 0; count < RCU_NEXT_SIZE; count++) 1587 for (i = 0; i < RCU_NEXT_SIZE; i++)
1587 if (&rdp->nxtlist == rdp->nxttail[count]) 1588 if (&rdp->nxtlist == rdp->nxttail[i])
1588 rdp->nxttail[count] = tail; 1589 rdp->nxttail[i] = tail;
1589 else 1590 else
1590 break; 1591 break;
1591 } 1592 }
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index ea056495783e..19b61ac1079f 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -444,6 +444,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
444/* Forward declarations for rcutree_plugin.h */ 444/* Forward declarations for rcutree_plugin.h */
445static void rcu_bootup_announce(void); 445static void rcu_bootup_announce(void);
446long rcu_batches_completed(void); 446long rcu_batches_completed(void);
447static void rcu_preempt_note_context_switch(int cpu);
447static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 448static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
448#ifdef CONFIG_HOTPLUG_CPU 449#ifdef CONFIG_HOTPLUG_CPU
449static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 450static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 5271a020887e..3e4899459f3d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu)
153 * 153 *
154 * Caller must disable preemption. 154 * Caller must disable preemption.
155 */ 155 */
156void rcu_preempt_note_context_switch(void) 156static void rcu_preempt_note_context_switch(int cpu)
157{ 157{
158 struct task_struct *t = current; 158 struct task_struct *t = current;
159 unsigned long flags; 159 unsigned long flags;
@@ -164,7 +164,7 @@ void rcu_preempt_note_context_switch(void)
164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
165 165
166 /* Possibly blocking in an RCU read-side critical section. */ 166 /* Possibly blocking in an RCU read-side critical section. */
167 rdp = __this_cpu_ptr(rcu_preempt_state.rda); 167 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
168 rnp = rdp->mynode; 168 rnp = rdp->mynode;
169 raw_spin_lock_irqsave(&rnp->lock, flags); 169 raw_spin_lock_irqsave(&rnp->lock, flags);
170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -228,7 +228,7 @@ void rcu_preempt_note_context_switch(void)
228 * means that we continue to block the current grace period. 228 * means that we continue to block the current grace period.
229 */ 229 */
230 local_irq_save(flags); 230 local_irq_save(flags);
231 rcu_preempt_qs(smp_processor_id()); 231 rcu_preempt_qs(cpu);
232 local_irq_restore(flags); 232 local_irq_restore(flags);
233} 233}
234 234
@@ -1002,6 +1002,14 @@ void rcu_force_quiescent_state(void)
1002EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 1002EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
1003 1003
1004/* 1004/*
1005 * Because preemptible RCU does not exist, we never have to check for
1006 * CPUs being in quiescent states.
1007 */
1008static void rcu_preempt_note_context_switch(int cpu)
1009{
1010}
1011
1012/*
1005 * Because preemptible RCU does not exist, there are never any preempted 1013 * Because preemptible RCU does not exist, there are never any preempted
1006 * RCU readers. 1014 * RCU readers.
1007 */ 1015 */
diff --git a/kernel/relay.c b/kernel/relay.c
index ab56a1764d4d..e8cd2027abbd 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
1235 struct splice_pipe_desc spd = { 1235 struct splice_pipe_desc spd = {
1236 .pages = pages, 1236 .pages = pages,
1237 .nr_pages = 0, 1237 .nr_pages = 0,
1238 .nr_pages_max = PIPE_DEF_BUFFERS,
1238 .partial = partial, 1239 .partial = partial,
1239 .flags = flags, 1240 .flags = flags,
1240 .ops = &relay_pipe_buf_ops, 1241 .ops = &relay_pipe_buf_ops,
@@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
1302 ret += padding; 1303 ret += padding;
1303 1304
1304out: 1305out:
1305 splice_shrink_spd(pipe, &spd); 1306 splice_shrink_spd(&spd);
1306 return ret; 1307 return ret;
1307} 1308}
1308 1309
1309static ssize_t relay_file_splice_read(struct file *in, 1310static ssize_t relay_file_splice_read(struct file *in,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d5594a4268d4..468bdd44c1ba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2081,7 +2081,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
2081#endif 2081#endif
2082 2082
2083 /* Here we just switch the register state and the stack. */ 2083 /* Here we just switch the register state and the stack. */
2084 rcu_switch_from(prev);
2085 switch_to(prev, next, prev); 2084 switch_to(prev, next, prev);
2086 2085
2087 barrier(); 2086 barrier();
@@ -2161,11 +2160,73 @@ unsigned long this_cpu_load(void)
2161} 2160}
2162 2161
2163 2162
2163/*
2164 * Global load-average calculations
2165 *
2166 * We take a distributed and async approach to calculating the global load-avg
2167 * in order to minimize overhead.
2168 *
2169 * The global load average is an exponentially decaying average of nr_running +
2170 * nr_uninterruptible.
2171 *
2172 * Once every LOAD_FREQ:
2173 *
2174 * nr_active = 0;
2175 * for_each_possible_cpu(cpu)
2176 * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;
2177 *
2178 * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
2179 *
2180 * Due to a number of reasons the above turns in the mess below:
2181 *
2182 * - for_each_possible_cpu() is prohibitively expensive on machines with
2183 * serious number of cpus, therefore we need to take a distributed approach
2184 * to calculating nr_active.
2185 *
2186 * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
2187 * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
2188 *
2189 * So assuming nr_active := 0 when we start out -- true per definition, we
2190 * can simply take per-cpu deltas and fold those into a global accumulate
2191 * to obtain the same result. See calc_load_fold_active().
2192 *
2193 * Furthermore, in order to avoid synchronizing all per-cpu delta folding
2194 * across the machine, we assume 10 ticks is sufficient time for every
2195 * cpu to have completed this task.
2196 *
2197 * This places an upper-bound on the IRQ-off latency of the machine. Then
2198 * again, being late doesn't loose the delta, just wrecks the sample.
2199 *
2200 * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
2201 * this would add another cross-cpu cacheline miss and atomic operation
2202 * to the wakeup path. Instead we increment on whatever cpu the task ran
2203 * when it went into uninterruptible state and decrement on whatever cpu
2204 * did the wakeup. This means that only the sum of nr_uninterruptible over
2205 * all cpus yields the correct result.
2206 *
2207 * This covers the NO_HZ=n code, for extra head-aches, see the comment below.
2208 */
2209
2164/* Variables and functions for calc_load */ 2210/* Variables and functions for calc_load */
2165static atomic_long_t calc_load_tasks; 2211static atomic_long_t calc_load_tasks;
2166static unsigned long calc_load_update; 2212static unsigned long calc_load_update;
2167unsigned long avenrun[3]; 2213unsigned long avenrun[3];
2168EXPORT_SYMBOL(avenrun); 2214EXPORT_SYMBOL(avenrun); /* should be removed */
2215
2216/**
2217 * get_avenrun - get the load average array
2218 * @loads: pointer to dest load array
2219 * @offset: offset to add
2220 * @shift: shift count to shift the result left
2221 *
2222 * These values are estimates at best, so no need for locking.
2223 */
2224void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2225{
2226 loads[0] = (avenrun[0] + offset) << shift;
2227 loads[1] = (avenrun[1] + offset) << shift;
2228 loads[2] = (avenrun[2] + offset) << shift;
2229}
2169 2230
2170static long calc_load_fold_active(struct rq *this_rq) 2231static long calc_load_fold_active(struct rq *this_rq)
2171{ 2232{
@@ -2182,6 +2243,9 @@ static long calc_load_fold_active(struct rq *this_rq)
2182 return delta; 2243 return delta;
2183} 2244}
2184 2245
2246/*
2247 * a1 = a0 * e + a * (1 - e)
2248 */
2185static unsigned long 2249static unsigned long
2186calc_load(unsigned long load, unsigned long exp, unsigned long active) 2250calc_load(unsigned long load, unsigned long exp, unsigned long active)
2187{ 2251{
@@ -2193,30 +2257,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
2193 2257
2194#ifdef CONFIG_NO_HZ 2258#ifdef CONFIG_NO_HZ
2195/* 2259/*
2196 * For NO_HZ we delay the active fold to the next LOAD_FREQ update. 2260 * Handle NO_HZ for the global load-average.
2261 *
2262 * Since the above described distributed algorithm to compute the global
2263 * load-average relies on per-cpu sampling from the tick, it is affected by
2264 * NO_HZ.
2265 *
2266 * The basic idea is to fold the nr_active delta into a global idle-delta upon
2267 * entering NO_HZ state such that we can include this as an 'extra' cpu delta
2268 * when we read the global state.
2269 *
2270 * Obviously reality has to ruin such a delightfully simple scheme:
2271 *
2272 * - When we go NO_HZ idle during the window, we can negate our sample
2273 * contribution, causing under-accounting.
2274 *
2275 * We avoid this by keeping two idle-delta counters and flipping them
2276 * when the window starts, thus separating old and new NO_HZ load.
2277 *
2278 * The only trick is the slight shift in index flip for read vs write.
2279 *
2280 * 0s 5s 10s 15s
2281 * +10 +10 +10 +10
2282 * |-|-----------|-|-----------|-|-----------|-|
2283 * r:0 0 1 1 0 0 1 1 0
2284 * w:0 1 1 0 0 1 1 0 0
2285 *
2286 * This ensures we'll fold the old idle contribution in this window while
2287 * accumlating the new one.
2288 *
2289 * - When we wake up from NO_HZ idle during the window, we push up our
2290 * contribution, since we effectively move our sample point to a known
2291 * busy state.
2292 *
2293 * This is solved by pushing the window forward, and thus skipping the
2294 * sample, for this cpu (effectively using the idle-delta for this cpu which
2295 * was in effect at the time the window opened). This also solves the issue
2296 * of having to deal with a cpu having been in NOHZ idle for multiple
2297 * LOAD_FREQ intervals.
2197 * 2298 *
2198 * When making the ILB scale, we should try to pull this in as well. 2299 * When making the ILB scale, we should try to pull this in as well.
2199 */ 2300 */
2200static atomic_long_t calc_load_tasks_idle; 2301static atomic_long_t calc_load_idle[2];
2302static int calc_load_idx;
2201 2303
2202void calc_load_account_idle(struct rq *this_rq) 2304static inline int calc_load_write_idx(void)
2203{ 2305{
2306 int idx = calc_load_idx;
2307
2308 /*
2309 * See calc_global_nohz(), if we observe the new index, we also
2310 * need to observe the new update time.
2311 */
2312 smp_rmb();
2313
2314 /*
2315 * If the folding window started, make sure we start writing in the
2316 * next idle-delta.
2317 */
2318 if (!time_before(jiffies, calc_load_update))
2319 idx++;
2320
2321 return idx & 1;
2322}
2323
2324static inline int calc_load_read_idx(void)
2325{
2326 return calc_load_idx & 1;
2327}
2328
2329void calc_load_enter_idle(void)
2330{
2331 struct rq *this_rq = this_rq();
2204 long delta; 2332 long delta;
2205 2333
2334 /*
2335 * We're going into NOHZ mode, if there's any pending delta, fold it
2336 * into the pending idle delta.
2337 */
2206 delta = calc_load_fold_active(this_rq); 2338 delta = calc_load_fold_active(this_rq);
2207 if (delta) 2339 if (delta) {
2208 atomic_long_add(delta, &calc_load_tasks_idle); 2340 int idx = calc_load_write_idx();
2341 atomic_long_add(delta, &calc_load_idle[idx]);
2342 }
2209} 2343}
2210 2344
2211static long calc_load_fold_idle(void) 2345void calc_load_exit_idle(void)
2212{ 2346{
2213 long delta = 0; 2347 struct rq *this_rq = this_rq();
2348
2349 /*
2350 * If we're still before the sample window, we're done.
2351 */
2352 if (time_before(jiffies, this_rq->calc_load_update))
2353 return;
2214 2354
2215 /* 2355 /*
2216 * Its got a race, we don't care... 2356 * We woke inside or after the sample window, this means we're already
2357 * accounted through the nohz accounting, so skip the entire deal and
2358 * sync up for the next window.
2217 */ 2359 */
2218 if (atomic_long_read(&calc_load_tasks_idle)) 2360 this_rq->calc_load_update = calc_load_update;
2219 delta = atomic_long_xchg(&calc_load_tasks_idle, 0); 2361 if (time_before(jiffies, this_rq->calc_load_update + 10))
2362 this_rq->calc_load_update += LOAD_FREQ;
2363}
2364
2365static long calc_load_fold_idle(void)
2366{
2367 int idx = calc_load_read_idx();
2368 long delta = 0;
2369
2370 if (atomic_long_read(&calc_load_idle[idx]))
2371 delta = atomic_long_xchg(&calc_load_idle[idx], 0);
2220 2372
2221 return delta; 2373 return delta;
2222} 2374}
@@ -2302,66 +2454,39 @@ static void calc_global_nohz(void)
2302{ 2454{
2303 long delta, active, n; 2455 long delta, active, n;
2304 2456
2305 /* 2457 if (!time_before(jiffies, calc_load_update + 10)) {
2306 * If we crossed a calc_load_update boundary, make sure to fold 2458 /*
2307 * any pending idle changes, the respective CPUs might have 2459 * Catch-up, fold however many we are behind still
2308 * missed the tick driven calc_load_account_active() update 2460 */
2309 * due to NO_HZ. 2461 delta = jiffies - calc_load_update - 10;
2310 */ 2462 n = 1 + (delta / LOAD_FREQ);
2311 delta = calc_load_fold_idle();
2312 if (delta)
2313 atomic_long_add(delta, &calc_load_tasks);
2314
2315 /*
2316 * It could be the one fold was all it took, we done!
2317 */
2318 if (time_before(jiffies, calc_load_update + 10))
2319 return;
2320
2321 /*
2322 * Catch-up, fold however many we are behind still
2323 */
2324 delta = jiffies - calc_load_update - 10;
2325 n = 1 + (delta / LOAD_FREQ);
2326 2463
2327 active = atomic_long_read(&calc_load_tasks); 2464 active = atomic_long_read(&calc_load_tasks);
2328 active = active > 0 ? active * FIXED_1 : 0; 2465 active = active > 0 ? active * FIXED_1 : 0;
2329 2466
2330 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); 2467 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
2331 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); 2468 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
2332 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); 2469 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
2333 2470
2334 calc_load_update += n * LOAD_FREQ; 2471 calc_load_update += n * LOAD_FREQ;
2335} 2472 }
2336#else
2337void calc_load_account_idle(struct rq *this_rq)
2338{
2339}
2340 2473
2341static inline long calc_load_fold_idle(void) 2474 /*
2342{ 2475 * Flip the idle index...
2343 return 0; 2476 *
2477 * Make sure we first write the new time then flip the index, so that
2478 * calc_load_write_idx() will see the new time when it reads the new
2479 * index, this avoids a double flip messing things up.
2480 */
2481 smp_wmb();
2482 calc_load_idx++;
2344} 2483}
2484#else /* !CONFIG_NO_HZ */
2345 2485
2346static void calc_global_nohz(void) 2486static inline long calc_load_fold_idle(void) { return 0; }
2347{ 2487static inline void calc_global_nohz(void) { }
2348}
2349#endif
2350 2488
2351/** 2489#endif /* CONFIG_NO_HZ */
2352 * get_avenrun - get the load average array
2353 * @loads: pointer to dest load array
2354 * @offset: offset to add
2355 * @shift: shift count to shift the result left
2356 *
2357 * These values are estimates at best, so no need for locking.
2358 */
2359void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2360{
2361 loads[0] = (avenrun[0] + offset) << shift;
2362 loads[1] = (avenrun[1] + offset) << shift;
2363 loads[2] = (avenrun[2] + offset) << shift;
2364}
2365 2490
2366/* 2491/*
2367 * calc_load - update the avenrun load estimates 10 ticks after the 2492 * calc_load - update the avenrun load estimates 10 ticks after the
@@ -2369,11 +2494,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2369 */ 2494 */
2370void calc_global_load(unsigned long ticks) 2495void calc_global_load(unsigned long ticks)
2371{ 2496{
2372 long active; 2497 long active, delta;
2373 2498
2374 if (time_before(jiffies, calc_load_update + 10)) 2499 if (time_before(jiffies, calc_load_update + 10))
2375 return; 2500 return;
2376 2501
2502 /*
2503 * Fold the 'old' idle-delta to include all NO_HZ cpus.
2504 */
2505 delta = calc_load_fold_idle();
2506 if (delta)
2507 atomic_long_add(delta, &calc_load_tasks);
2508
2377 active = atomic_long_read(&calc_load_tasks); 2509 active = atomic_long_read(&calc_load_tasks);
2378 active = active > 0 ? active * FIXED_1 : 0; 2510 active = active > 0 ? active * FIXED_1 : 0;
2379 2511
@@ -2384,12 +2516,7 @@ void calc_global_load(unsigned long ticks)
2384 calc_load_update += LOAD_FREQ; 2516 calc_load_update += LOAD_FREQ;
2385 2517
2386 /* 2518 /*
2387 * Account one period with whatever state we found before 2519 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
2388 * folding in the nohz state and ageing the entire idle period.
2389 *
2390 * This avoids loosing a sample when we go idle between
2391 * calc_load_account_active() (10 ticks ago) and now and thus
2392 * under-accounting.
2393 */ 2520 */
2394 calc_global_nohz(); 2521 calc_global_nohz();
2395} 2522}
@@ -2406,7 +2533,6 @@ static void calc_load_account_active(struct rq *this_rq)
2406 return; 2533 return;
2407 2534
2408 delta = calc_load_fold_active(this_rq); 2535 delta = calc_load_fold_active(this_rq);
2409 delta += calc_load_fold_idle();
2410 if (delta) 2536 if (delta)
2411 atomic_long_add(delta, &calc_load_tasks); 2537 atomic_long_add(delta, &calc_load_tasks);
2412 2538
@@ -2414,6 +2540,10 @@ static void calc_load_account_active(struct rq *this_rq)
2414} 2540}
2415 2541
2416/* 2542/*
2543 * End of global load-average stuff
2544 */
2545
2546/*
2417 * The exact cpuload at various idx values, calculated at every tick would be 2547 * The exact cpuload at various idx values, calculated at every tick would be
2418 * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load 2548 * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
2419 * 2549 *
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index b44d604b35d1..b6baf370cae9 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -25,7 +25,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
25static struct task_struct *pick_next_task_idle(struct rq *rq) 25static struct task_struct *pick_next_task_idle(struct rq *rq)
26{ 26{
27 schedstat_inc(rq, sched_goidle); 27 schedstat_inc(rq, sched_goidle);
28 calc_load_account_idle(rq);
29 return rq->idle; 28 return rq->idle;
30} 29}
31 30
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6d52cea7f33d..55844f24435a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -942,8 +942,6 @@ static inline u64 sched_avg_period(void)
942 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; 942 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
943} 943}
944 944
945void calc_load_account_idle(struct rq *this_rq);
946
947#ifdef CONFIG_SCHED_HRTICK 945#ifdef CONFIG_SCHED_HRTICK
948 946
949/* 947/*
diff --git a/kernel/sys.c b/kernel/sys.c
index f0ec44dcd415..2d39a84cd857 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1788,7 +1788,6 @@ SYSCALL_DEFINE1(umask, int, mask)
1788#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1790{ 1790{
1791 struct vm_area_struct *vma;
1792 struct file *exe_file; 1791 struct file *exe_file;
1793 struct dentry *dentry; 1792 struct dentry *dentry;
1794 int err; 1793 int err;
@@ -1816,13 +1815,17 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1816 down_write(&mm->mmap_sem); 1815 down_write(&mm->mmap_sem);
1817 1816
1818 /* 1817 /*
1819 * Forbid mm->exe_file change if there are mapped other files. 1818 * Forbid mm->exe_file change if old file still mapped.
1820 */ 1819 */
1821 err = -EBUSY; 1820 err = -EBUSY;
1822 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1821 if (mm->exe_file) {
1823 if (vma->vm_file && !path_equal(&vma->vm_file->f_path, 1822 struct vm_area_struct *vma;
1824 &exe_file->f_path)) 1823
1825 goto exit_unlock; 1824 for (vma = mm->mmap; vma; vma = vma->vm_next)
1825 if (vma->vm_file &&
1826 path_equal(&vma->vm_file->f_path,
1827 &mm->exe_file->f_path))
1828 goto exit_unlock;
1826 } 1829 }
1827 1830
1828 /* 1831 /*
@@ -1835,6 +1838,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1835 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1838 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1836 goto exit_unlock; 1839 goto exit_unlock;
1837 1840
1841 err = 0;
1838 set_mm_exe_file(mm, exe_file); 1842 set_mm_exe_file(mm, exe_file);
1839exit_unlock: 1843exit_unlock:
1840 up_write(&mm->mmap_sem); 1844 up_write(&mm->mmap_sem);
@@ -2127,9 +2131,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2127 else 2131 else
2128 return -EINVAL; 2132 return -EINVAL;
2129 break; 2133 break;
2130 case PR_GET_TID_ADDRESS:
2131 error = prctl_get_tid_address(me, (int __user **)arg2);
2132 break;
2133 default: 2134 default:
2134 return -EINVAL; 2135 return -EINVAL;
2135 } 2136 }
@@ -2147,6 +2148,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2147 case PR_SET_MM: 2148 case PR_SET_MM:
2148 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2149 error = prctl_set_mm(arg2, arg3, arg4, arg5);
2149 break; 2150 break;
2151 case PR_GET_TID_ADDRESS:
2152 error = prctl_get_tid_address(me, (int __user **)arg2);
2153 break;
2150 case PR_SET_CHILD_SUBREAPER: 2154 case PR_SET_CHILD_SUBREAPER:
2151 me->signal->is_child_subreaper = !!arg2; 2155 me->signal->is_child_subreaper = !!arg2;
2152 error = 0; 2156 error = 0;
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 70b33abcc7bb..b7fbadc5c973 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -409,7 +409,9 @@ int second_overflow(unsigned long secs)
409 time_state = TIME_DEL; 409 time_state = TIME_DEL;
410 break; 410 break;
411 case TIME_INS: 411 case TIME_INS:
412 if (secs % 86400 == 0) { 412 if (!(time_status & STA_INS))
413 time_state = TIME_OK;
414 else if (secs % 86400 == 0) {
413 leap = -1; 415 leap = -1;
414 time_state = TIME_OOP; 416 time_state = TIME_OOP;
415 time_tai++; 417 time_tai++;
@@ -418,7 +420,9 @@ int second_overflow(unsigned long secs)
418 } 420 }
419 break; 421 break;
420 case TIME_DEL: 422 case TIME_DEL:
421 if ((secs + 1) % 86400 == 0) { 423 if (!(time_status & STA_DEL))
424 time_state = TIME_OK;
425 else if ((secs + 1) % 86400 == 0) {
422 leap = 1; 426 leap = 1;
423 time_tai--; 427 time_tai--;
424 time_state = TIME_WAIT; 428 time_state = TIME_WAIT;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 869997833928..4a08472c3ca7 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -406,6 +406,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
406 */ 406 */
407 if (!ts->tick_stopped) { 407 if (!ts->tick_stopped) {
408 select_nohz_load_balancer(1); 408 select_nohz_load_balancer(1);
409 calc_load_enter_idle();
409 410
410 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); 411 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
411 ts->tick_stopped = 1; 412 ts->tick_stopped = 1;
@@ -597,6 +598,7 @@ void tick_nohz_idle_exit(void)
597 account_idle_ticks(ticks); 598 account_idle_ticks(ticks);
598#endif 599#endif
599 600
601 calc_load_exit_idle();
600 touch_softlockup_watchdog(); 602 touch_softlockup_watchdog();
601 /* 603 /*
602 * Cancel the scheduled timer and restore the tick 604 * Cancel the scheduled timer and restore the tick
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6f46a00a1e8a..3447cfaf11e7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -70,6 +70,12 @@ struct timekeeper {
70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ 70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
71 struct timespec raw_time; 71 struct timespec raw_time;
72 72
73 /* Offset clock monotonic -> clock realtime */
74 ktime_t offs_real;
75
76 /* Offset clock monotonic -> clock boottime */
77 ktime_t offs_boot;
78
73 /* Seqlock for all timekeeper values */ 79 /* Seqlock for all timekeeper values */
74 seqlock_t lock; 80 seqlock_t lock;
75}; 81};
@@ -172,6 +178,14 @@ static inline s64 timekeeping_get_ns_raw(void)
172 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); 178 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
173} 179}
174 180
181static void update_rt_offset(void)
182{
183 struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic;
184
185 set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
186 timekeeper.offs_real = timespec_to_ktime(tmp);
187}
188
175/* must hold write on timekeeper.lock */ 189/* must hold write on timekeeper.lock */
176static void timekeeping_update(bool clearntp) 190static void timekeeping_update(bool clearntp)
177{ 191{
@@ -179,6 +193,7 @@ static void timekeeping_update(bool clearntp)
179 timekeeper.ntp_error = 0; 193 timekeeper.ntp_error = 0;
180 ntp_clear(); 194 ntp_clear();
181 } 195 }
196 update_rt_offset();
182 update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, 197 update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic,
183 timekeeper.clock, timekeeper.mult); 198 timekeeper.clock, timekeeper.mult);
184} 199}
@@ -604,6 +619,7 @@ void __init timekeeping_init(void)
604 } 619 }
605 set_normalized_timespec(&timekeeper.wall_to_monotonic, 620 set_normalized_timespec(&timekeeper.wall_to_monotonic,
606 -boot.tv_sec, -boot.tv_nsec); 621 -boot.tv_sec, -boot.tv_nsec);
622 update_rt_offset();
607 timekeeper.total_sleep_time.tv_sec = 0; 623 timekeeper.total_sleep_time.tv_sec = 0;
608 timekeeper.total_sleep_time.tv_nsec = 0; 624 timekeeper.total_sleep_time.tv_nsec = 0;
609 write_sequnlock_irqrestore(&timekeeper.lock, flags); 625 write_sequnlock_irqrestore(&timekeeper.lock, flags);
@@ -612,6 +628,12 @@ void __init timekeeping_init(void)
612/* time in seconds when suspend began */ 628/* time in seconds when suspend began */
613static struct timespec timekeeping_suspend_time; 629static struct timespec timekeeping_suspend_time;
614 630
631static void update_sleep_time(struct timespec t)
632{
633 timekeeper.total_sleep_time = t;
634 timekeeper.offs_boot = timespec_to_ktime(t);
635}
636
615/** 637/**
616 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 638 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
617 * @delta: pointer to a timespec delta value 639 * @delta: pointer to a timespec delta value
@@ -630,8 +652,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
630 timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); 652 timekeeper.xtime = timespec_add(timekeeper.xtime, *delta);
631 timekeeper.wall_to_monotonic = 653 timekeeper.wall_to_monotonic =
632 timespec_sub(timekeeper.wall_to_monotonic, *delta); 654 timespec_sub(timekeeper.wall_to_monotonic, *delta);
633 timekeeper.total_sleep_time = timespec_add( 655 update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta));
634 timekeeper.total_sleep_time, *delta);
635} 656}
636 657
637 658
@@ -696,6 +717,7 @@ static void timekeeping_resume(void)
696 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 717 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
697 timekeeper.ntp_error = 0; 718 timekeeper.ntp_error = 0;
698 timekeeping_suspended = 0; 719 timekeeping_suspended = 0;
720 timekeeping_update(false);
699 write_sequnlock_irqrestore(&timekeeper.lock, flags); 721 write_sequnlock_irqrestore(&timekeeper.lock, flags);
700 722
701 touch_softlockup_watchdog(); 723 touch_softlockup_watchdog();
@@ -963,6 +985,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
963 leap = second_overflow(timekeeper.xtime.tv_sec); 985 leap = second_overflow(timekeeper.xtime.tv_sec);
964 timekeeper.xtime.tv_sec += leap; 986 timekeeper.xtime.tv_sec += leap;
965 timekeeper.wall_to_monotonic.tv_sec -= leap; 987 timekeeper.wall_to_monotonic.tv_sec -= leap;
988 if (leap)
989 clock_was_set_delayed();
966 } 990 }
967 991
968 /* Accumulate raw time */ 992 /* Accumulate raw time */
@@ -1079,6 +1103,8 @@ static void update_wall_time(void)
1079 leap = second_overflow(timekeeper.xtime.tv_sec); 1103 leap = second_overflow(timekeeper.xtime.tv_sec);
1080 timekeeper.xtime.tv_sec += leap; 1104 timekeeper.xtime.tv_sec += leap;
1081 timekeeper.wall_to_monotonic.tv_sec -= leap; 1105 timekeeper.wall_to_monotonic.tv_sec -= leap;
1106 if (leap)
1107 clock_was_set_delayed();
1082 } 1108 }
1083 1109
1084 timekeeping_update(false); 1110 timekeeping_update(false);
@@ -1246,6 +1272,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1246 } while (read_seqretry(&timekeeper.lock, seq)); 1272 } while (read_seqretry(&timekeeper.lock, seq));
1247} 1273}
1248 1274
1275#ifdef CONFIG_HIGH_RES_TIMERS
1276/**
1277 * ktime_get_update_offsets - hrtimer helper
1278 * @offs_real: pointer to storage for monotonic -> realtime offset
1279 * @offs_boot: pointer to storage for monotonic -> boottime offset
1280 *
1281 * Returns current monotonic time and updates the offsets
1282 * Called from hrtimer_interupt() or retrigger_next_event()
1283 */
1284ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1285{
1286 ktime_t now;
1287 unsigned int seq;
1288 u64 secs, nsecs;
1289
1290 do {
1291 seq = read_seqbegin(&timekeeper.lock);
1292
1293 secs = timekeeper.xtime.tv_sec;
1294 nsecs = timekeeper.xtime.tv_nsec;
1295 nsecs += timekeeping_get_ns();
1296 /* If arch requires, add in gettimeoffset() */
1297 nsecs += arch_gettimeoffset();
1298
1299 *offs_real = timekeeper.offs_real;
1300 *offs_boot = timekeeper.offs_boot;
1301 } while (read_seqretry(&timekeeper.lock, seq));
1302
1303 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1304 now = ktime_sub(now, *offs_real);
1305 return now;
1306}
1307#endif
1308
1249/** 1309/**
1250 * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format 1310 * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
1251 */ 1311 */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1d0f6a8a0e5e..f765465bffe4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1075,6 +1075,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1075 rb_init_page(bpage->page); 1075 rb_init_page(bpage->page);
1076 1076
1077 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1077 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1078 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1078 1079
1079 ret = rb_allocate_pages(cpu_buffer, nr_pages); 1080 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1080 if (ret < 0) 1081 if (ret < 0)
@@ -1346,10 +1347,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1346 * If something was added to this page, it was full 1347 * If something was added to this page, it was full
1347 * since it is not the tail page. So we deduct the 1348 * since it is not the tail page. So we deduct the
1348 * bytes consumed in ring buffer from here. 1349 * bytes consumed in ring buffer from here.
1349 * No need to update overruns, since this page is 1350 * Increment overrun to account for the lost events.
1350 * deleted from ring buffer and its entries are
1351 * already accounted for.
1352 */ 1351 */
1352 local_add(page_entries, &cpu_buffer->overrun);
1353 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); 1353 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1354 } 1354 }
1355 1355
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 49249c28690d..a7fa0702be1c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3609,6 +3609,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3609 .pages = pages_def, 3609 .pages = pages_def,
3610 .partial = partial_def, 3610 .partial = partial_def,
3611 .nr_pages = 0, /* This gets updated below. */ 3611 .nr_pages = 0, /* This gets updated below. */
3612 .nr_pages_max = PIPE_DEF_BUFFERS,
3612 .flags = flags, 3613 .flags = flags,
3613 .ops = &tracing_pipe_buf_ops, 3614 .ops = &tracing_pipe_buf_ops,
3614 .spd_release = tracing_spd_release_pipe, 3615 .spd_release = tracing_spd_release_pipe,
@@ -3680,7 +3681,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3680 3681
3681 ret = splice_to_pipe(pipe, &spd); 3682 ret = splice_to_pipe(pipe, &spd);
3682out: 3683out:
3683 splice_shrink_spd(pipe, &spd); 3684 splice_shrink_spd(&spd);
3684 return ret; 3685 return ret;
3685 3686
3686out_err: 3687out_err:
@@ -4231,6 +4232,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4231 struct splice_pipe_desc spd = { 4232 struct splice_pipe_desc spd = {
4232 .pages = pages_def, 4233 .pages = pages_def,
4233 .partial = partial_def, 4234 .partial = partial_def,
4235 .nr_pages_max = PIPE_DEF_BUFFERS,
4234 .flags = flags, 4236 .flags = flags,
4235 .ops = &buffer_pipe_buf_ops, 4237 .ops = &buffer_pipe_buf_ops,
4236 .spd_release = buffer_spd_release, 4238 .spd_release = buffer_spd_release,
@@ -4318,7 +4320,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4318 } 4320 }
4319 4321
4320 ret = splice_to_pipe(pipe, &spd); 4322 ret = splice_to_pipe(pipe, &spd);
4321 splice_shrink_spd(pipe, &spd); 4323 splice_shrink_spd(&spd);
4322out: 4324out:
4323 return ret; 4325 return ret;
4324} 4326}