aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-07-05 15:10:23 -0400
committerIngo Molnar <mingo@kernel.org>2012-07-05 15:10:23 -0400
commit90574ebb7e6e0f7f74636ee87315890ba88d6a4a (patch)
tree5f60106dacbfe246f52aaaf9bac69dd4749f24a6 /kernel
parentadd79461a2a7d964a00b4a2fdaf313c4cf9cf4ec (diff)
parentce5c1fe9a9e059b5c58f0a7e2a3e687d0efac815 (diff)
Merge branch 'perf/urgent' into perf/core
Merge this branch to pick up a fixlet and to update to a more recent base. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c13
-rw-r--r--kernel/exit.c19
-rw-r--r--kernel/panic.c6
-rw-r--r--kernel/pid_namespace.c20
-rw-r--r--kernel/printk.c532
-rw-r--r--kernel/rcutree.c16
-rw-r--r--kernel/rcutree.h14
-rw-r--r--kernel/rcutree_plugin.h165
-rw-r--r--kernel/relay.c5
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/trace/trace.c6
12 files changed, 591 insertions, 218 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 72fcd3069a90..2097684cf194 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -255,12 +255,17 @@ int cgroup_lock_is_held(void)
255 255
256EXPORT_SYMBOL_GPL(cgroup_lock_is_held); 256EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
257 257
258static int css_unbias_refcnt(int refcnt)
259{
260 return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
261}
262
258/* the current nr of refs, always >= 0 whether @css is deactivated or not */ 263/* the current nr of refs, always >= 0 whether @css is deactivated or not */
259static int css_refcnt(struct cgroup_subsys_state *css) 264static int css_refcnt(struct cgroup_subsys_state *css)
260{ 265{
261 int v = atomic_read(&css->refcnt); 266 int v = atomic_read(&css->refcnt);
262 267
263 return v >= 0 ? v : v - CSS_DEACT_BIAS; 268 return css_unbias_refcnt(v);
264} 269}
265 270
266/* convenient tests for these bits */ 271/* convenient tests for these bits */
@@ -4982,10 +4987,12 @@ EXPORT_SYMBOL_GPL(__css_tryget);
4982void __css_put(struct cgroup_subsys_state *css) 4987void __css_put(struct cgroup_subsys_state *css)
4983{ 4988{
4984 struct cgroup *cgrp = css->cgroup; 4989 struct cgroup *cgrp = css->cgroup;
4990 int v;
4985 4991
4986 rcu_read_lock(); 4992 rcu_read_lock();
4987 atomic_dec(&css->refcnt); 4993 v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
4988 switch (css_refcnt(css)) { 4994
4995 switch (v) {
4989 case 1: 4996 case 1:
4990 if (notify_on_release(cgrp)) { 4997 if (notify_on_release(cgrp)) {
4991 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4998 set_bit(CGRP_RELEASABLE, &cgrp->flags);
diff --git a/kernel/exit.c b/kernel/exit.c
index 34867cc5b42a..2f59cc334516 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,6 +72,18 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
72 list_del_rcu(&p->tasks); 72 list_del_rcu(&p->tasks);
73 list_del_init(&p->sibling); 73 list_del_init(&p->sibling);
74 __this_cpu_dec(process_counts); 74 __this_cpu_dec(process_counts);
75 /*
76 * If we are the last child process in a pid namespace to be
77 * reaped, notify the reaper sleeping zap_pid_ns_processes().
78 */
79 if (IS_ENABLED(CONFIG_PID_NS)) {
80 struct task_struct *parent = p->real_parent;
81
82 if ((task_active_pid_ns(parent)->child_reaper == parent) &&
83 list_empty(&parent->children) &&
84 (parent->flags & PF_EXITING))
85 wake_up_process(parent);
86 }
75 } 87 }
76 list_del_rcu(&p->thread_group); 88 list_del_rcu(&p->thread_group);
77} 89}
@@ -643,6 +655,7 @@ static void exit_mm(struct task_struct * tsk)
643 mm_release(tsk, mm); 655 mm_release(tsk, mm);
644 if (!mm) 656 if (!mm)
645 return; 657 return;
658 sync_mm_rss(mm);
646 /* 659 /*
647 * Serialize with any possible pending coredump. 660 * Serialize with any possible pending coredump.
648 * We must hold mmap_sem around checking core_state 661 * We must hold mmap_sem around checking core_state
@@ -719,12 +732,6 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
719 732
720 zap_pid_ns_processes(pid_ns); 733 zap_pid_ns_processes(pid_ns);
721 write_lock_irq(&tasklist_lock); 734 write_lock_irq(&tasklist_lock);
722 /*
723 * We can not clear ->child_reaper or leave it alone.
724 * There may by stealth EXIT_DEAD tasks on ->children,
725 * forget_original_parent() must move them somewhere.
726 */
727 pid_ns->child_reaper = init_pid_ns.child_reaper;
728 } else if (father->signal->has_child_subreaper) { 735 } else if (father->signal->has_child_subreaper) {
729 struct task_struct *reaper; 736 struct task_struct *reaper;
730 737
diff --git a/kernel/panic.c b/kernel/panic.c
index 8ed89a175d79..d2a5f4ecc6dd 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -27,7 +27,7 @@
27#define PANIC_TIMER_STEP 100 27#define PANIC_TIMER_STEP 100
28#define PANIC_BLINK_SPD 18 28#define PANIC_BLINK_SPD 18
29 29
30int panic_on_oops; 30int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
31static unsigned long tainted_mask; 31static unsigned long tainted_mask;
32static int pause_on_oops; 32static int pause_on_oops;
33static int pause_on_oops_flag; 33static int pause_on_oops_flag;
@@ -108,8 +108,6 @@ void panic(const char *fmt, ...)
108 */ 108 */
109 crash_kexec(NULL); 109 crash_kexec(NULL);
110 110
111 kmsg_dump(KMSG_DUMP_PANIC);
112
113 /* 111 /*
114 * Note smp_send_stop is the usual smp shutdown function, which 112 * Note smp_send_stop is the usual smp shutdown function, which
115 * unfortunately means it may not be hardened to work in a panic 113 * unfortunately means it may not be hardened to work in a panic
@@ -117,6 +115,8 @@ void panic(const char *fmt, ...)
117 */ 115 */
118 smp_send_stop(); 116 smp_send_stop();
119 117
118 kmsg_dump(KMSG_DUMP_PANIC);
119
120 atomic_notifier_call_chain(&panic_notifier_list, 0, buf); 120 atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
121 121
122 bust_spinlocks(0); 122 bust_spinlocks(0);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 16b20e38c4a1..b3c7fd554250 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -184,11 +184,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
184 } 184 }
185 read_unlock(&tasklist_lock); 185 read_unlock(&tasklist_lock);
186 186
187 /* Firstly reap the EXIT_ZOMBIE children we may have. */
187 do { 188 do {
188 clear_thread_flag(TIF_SIGPENDING); 189 clear_thread_flag(TIF_SIGPENDING);
189 rc = sys_wait4(-1, NULL, __WALL, NULL); 190 rc = sys_wait4(-1, NULL, __WALL, NULL);
190 } while (rc != -ECHILD); 191 } while (rc != -ECHILD);
191 192
193 /*
194 * sys_wait4() above can't reap the TASK_DEAD children.
195 * Make sure they all go away, see __unhash_process().
196 */
197 for (;;) {
198 bool need_wait = false;
199
200 read_lock(&tasklist_lock);
201 if (!list_empty(&current->children)) {
202 __set_current_state(TASK_UNINTERRUPTIBLE);
203 need_wait = true;
204 }
205 read_unlock(&tasklist_lock);
206
207 if (!need_wait)
208 break;
209 schedule();
210 }
211
192 if (pid_ns->reboot) 212 if (pid_ns->reboot)
193 current->signal->group_exit_code = pid_ns->reboot; 213 current->signal->group_exit_code = pid_ns->reboot;
194 214
diff --git a/kernel/printk.c b/kernel/printk.c
index 32462d2b364a..dba18211685e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -193,12 +193,19 @@ static int console_may_schedule;
193 * separated by ',', and find the message after the ';' character. 193 * separated by ',', and find the message after the ';' character.
194 */ 194 */
195 195
196enum log_flags {
197 LOG_DEFAULT = 0,
198 LOG_NOCONS = 1, /* already flushed, do not print to console */
199};
200
196struct log { 201struct log {
197 u64 ts_nsec; /* timestamp in nanoseconds */ 202 u64 ts_nsec; /* timestamp in nanoseconds */
198 u16 len; /* length of entire record */ 203 u16 len; /* length of entire record */
199 u16 text_len; /* length of text buffer */ 204 u16 text_len; /* length of text buffer */
200 u16 dict_len; /* length of dictionary buffer */ 205 u16 dict_len; /* length of dictionary buffer */
201 u16 level; /* syslog level + facility */ 206 u8 facility; /* syslog facility */
207 u8 flags:5; /* internal record flags */
208 u8 level:3; /* syslog level */
202}; 209};
203 210
204/* 211/*
@@ -227,10 +234,10 @@ static u32 clear_idx;
227#define LOG_LINE_MAX 1024 234#define LOG_LINE_MAX 1024
228 235
229/* record buffer */ 236/* record buffer */
230#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 237#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
231#define LOG_ALIGN 4 238#define LOG_ALIGN 4
232#else 239#else
233#define LOG_ALIGN 8 240#define LOG_ALIGN __alignof__(struct log)
234#endif 241#endif
235#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 242#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
236static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 243static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -286,6 +293,7 @@ static u32 log_next(u32 idx)
286 293
287/* insert record into the buffer, discard old ones, update heads */ 294/* insert record into the buffer, discard old ones, update heads */
288static void log_store(int facility, int level, 295static void log_store(int facility, int level,
296 enum log_flags flags, u64 ts_nsec,
289 const char *dict, u16 dict_len, 297 const char *dict, u16 dict_len,
290 const char *text, u16 text_len) 298 const char *text, u16 text_len)
291{ 299{
@@ -329,8 +337,13 @@ static void log_store(int facility, int level,
329 msg->text_len = text_len; 337 msg->text_len = text_len;
330 memcpy(log_dict(msg), dict, dict_len); 338 memcpy(log_dict(msg), dict, dict_len);
331 msg->dict_len = dict_len; 339 msg->dict_len = dict_len;
332 msg->level = (facility << 3) | (level & 7); 340 msg->facility = facility;
333 msg->ts_nsec = local_clock(); 341 msg->level = level & 7;
342 msg->flags = flags & 0x1f;
343 if (ts_nsec > 0)
344 msg->ts_nsec = ts_nsec;
345 else
346 msg->ts_nsec = local_clock();
334 memset(log_dict(msg) + dict_len, 0, pad_len); 347 memset(log_dict(msg) + dict_len, 0, pad_len);
335 msg->len = sizeof(struct log) + text_len + dict_len + pad_len; 348 msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
336 349
@@ -414,7 +427,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
414 if (!user) 427 if (!user)
415 return -EBADF; 428 return -EBADF;
416 429
417 mutex_lock(&user->lock); 430 ret = mutex_lock_interruptible(&user->lock);
431 if (ret)
432 return ret;
418 raw_spin_lock(&logbuf_lock); 433 raw_spin_lock(&logbuf_lock);
419 while (user->seq == log_next_seq) { 434 while (user->seq == log_next_seq) {
420 if (file->f_flags & O_NONBLOCK) { 435 if (file->f_flags & O_NONBLOCK) {
@@ -444,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
444 ts_usec = msg->ts_nsec; 459 ts_usec = msg->ts_nsec;
445 do_div(ts_usec, 1000); 460 do_div(ts_usec, 1000);
446 len = sprintf(user->buf, "%u,%llu,%llu;", 461 len = sprintf(user->buf, "%u,%llu,%llu;",
447 msg->level, user->seq, ts_usec); 462 (msg->facility << 3) | msg->level, user->seq, ts_usec);
448 463
449 /* escape non-printable characters */ 464 /* escape non-printable characters */
450 for (i = 0; i < msg->text_len; i++) { 465 for (i = 0; i < msg->text_len; i++) {
@@ -785,6 +800,21 @@ static bool printk_time;
785#endif 800#endif
786module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); 801module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
787 802
803static size_t print_time(u64 ts, char *buf)
804{
805 unsigned long rem_nsec;
806
807 if (!printk_time)
808 return 0;
809
810 if (!buf)
811 return 15;
812
813 rem_nsec = do_div(ts, 1000000000);
814 return sprintf(buf, "[%5lu.%06lu] ",
815 (unsigned long)ts, rem_nsec / 1000);
816}
817
788static size_t print_prefix(const struct log *msg, bool syslog, char *buf) 818static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
789{ 819{
790 size_t len = 0; 820 size_t len = 0;
@@ -801,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
801 } 831 }
802 } 832 }
803 833
804 if (printk_time) { 834 len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
805 if (buf) {
806 unsigned long long ts = msg->ts_nsec;
807 unsigned long rem_nsec = do_div(ts, 1000000000);
808
809 len += sprintf(buf + len, "[%5lu.%06lu] ",
810 (unsigned long) ts, rem_nsec / 1000);
811 } else {
812 len += 15;
813 }
814 }
815
816 return len; 835 return len;
817} 836}
818 837
@@ -860,26 +879,49 @@ static int syslog_print(char __user *buf, int size)
860{ 879{
861 char *text; 880 char *text;
862 struct log *msg; 881 struct log *msg;
863 int len; 882 int len = 0;
864 883
865 text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); 884 text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
866 if (!text) 885 if (!text)
867 return -ENOMEM; 886 return -ENOMEM;
868 887
869 raw_spin_lock_irq(&logbuf_lock); 888 while (size > 0) {
870 if (syslog_seq < log_first_seq) { 889 size_t n;
871 /* messages are gone, move to first one */ 890
872 syslog_seq = log_first_seq; 891 raw_spin_lock_irq(&logbuf_lock);
873 syslog_idx = log_first_idx; 892 if (syslog_seq < log_first_seq) {
874 } 893 /* messages are gone, move to first one */
875 msg = log_from_idx(syslog_idx); 894 syslog_seq = log_first_seq;
876 len = msg_print_text(msg, true, text, LOG_LINE_MAX); 895 syslog_idx = log_first_idx;
877 syslog_idx = log_next(syslog_idx); 896 }
878 syslog_seq++; 897 if (syslog_seq == log_next_seq) {
879 raw_spin_unlock_irq(&logbuf_lock); 898 raw_spin_unlock_irq(&logbuf_lock);
899 break;
900 }
901 msg = log_from_idx(syslog_idx);
902 n = msg_print_text(msg, true, text, LOG_LINE_MAX);
903 if (n <= size) {
904 syslog_idx = log_next(syslog_idx);
905 syslog_seq++;
906 } else
907 n = 0;
908 raw_spin_unlock_irq(&logbuf_lock);
909
910 if (!n)
911 break;
912
913 len += n;
914 size -= n;
915 buf += n;
916 n = copy_to_user(buf - n, text, n);
880 917
881 if (len > 0 && copy_to_user(buf, text, len)) 918 if (n) {
882 len = -EFAULT; 919 len -= n;
920 if (!len)
921 len = -EFAULT;
922 break;
923 }
924 }
883 925
884 kfree(text); 926 kfree(text);
885 return len; 927 return len;
@@ -909,7 +951,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
909 /* 951 /*
910 * Find first record that fits, including all following records, 952 * Find first record that fits, including all following records,
911 * into the user-provided buffer for this dump. 953 * into the user-provided buffer for this dump.
912 */ 954 */
913 seq = clear_seq; 955 seq = clear_seq;
914 idx = clear_idx; 956 idx = clear_idx;
915 while (seq < log_next_seq) { 957 while (seq < log_next_seq) {
@@ -919,6 +961,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
919 idx = log_next(idx); 961 idx = log_next(idx);
920 seq++; 962 seq++;
921 } 963 }
964
965 /* move first record forward until length fits into the buffer */
922 seq = clear_seq; 966 seq = clear_seq;
923 idx = clear_idx; 967 idx = clear_idx;
924 while (len > size && seq < log_next_seq) { 968 while (len > size && seq < log_next_seq) {
@@ -929,7 +973,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
929 seq++; 973 seq++;
930 } 974 }
931 975
932 /* last message in this dump */ 976 /* last message fitting into this dump */
933 next_seq = log_next_seq; 977 next_seq = log_next_seq;
934 978
935 len = 0; 979 len = 0;
@@ -974,6 +1018,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
974{ 1018{
975 bool clear = false; 1019 bool clear = false;
976 static int saved_console_loglevel = -1; 1020 static int saved_console_loglevel = -1;
1021 static DEFINE_MUTEX(syslog_mutex);
977 int error; 1022 int error;
978 1023
979 error = check_syslog_permissions(type, from_file); 1024 error = check_syslog_permissions(type, from_file);
@@ -1000,11 +1045,17 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1000 error = -EFAULT; 1045 error = -EFAULT;
1001 goto out; 1046 goto out;
1002 } 1047 }
1048 error = mutex_lock_interruptible(&syslog_mutex);
1049 if (error)
1050 goto out;
1003 error = wait_event_interruptible(log_wait, 1051 error = wait_event_interruptible(log_wait,
1004 syslog_seq != log_next_seq); 1052 syslog_seq != log_next_seq);
1005 if (error) 1053 if (error) {
1054 mutex_unlock(&syslog_mutex);
1006 goto out; 1055 goto out;
1056 }
1007 error = syslog_print(buf, len); 1057 error = syslog_print(buf, len);
1058 mutex_unlock(&syslog_mutex);
1008 break; 1059 break;
1009 /* Read/clear last kernel messages */ 1060 /* Read/clear last kernel messages */
1010 case SYSLOG_ACTION_READ_CLEAR: 1061 case SYSLOG_ACTION_READ_CLEAR:
@@ -1027,6 +1078,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1027 /* Clear ring buffer */ 1078 /* Clear ring buffer */
1028 case SYSLOG_ACTION_CLEAR: 1079 case SYSLOG_ACTION_CLEAR:
1029 syslog_print_all(NULL, 0, true); 1080 syslog_print_all(NULL, 0, true);
1081 break;
1030 /* Disable logging to console */ 1082 /* Disable logging to console */
1031 case SYSLOG_ACTION_CONSOLE_OFF: 1083 case SYSLOG_ACTION_CONSOLE_OFF:
1032 if (saved_console_loglevel == -1) 1084 if (saved_console_loglevel == -1)
@@ -1259,15 +1311,92 @@ static inline void printk_delay(void)
1259 } 1311 }
1260} 1312}
1261 1313
1314/*
1315 * Continuation lines are buffered, and not committed to the record buffer
1316 * until the line is complete, or a race forces it. The line fragments
1317 * though, are printed immediately to the consoles to ensure everything has
1318 * reached the console in case of a kernel crash.
1319 */
1320static struct cont {
1321 char buf[LOG_LINE_MAX];
1322 size_t len; /* length == 0 means unused buffer */
1323 size_t cons; /* bytes written to console */
1324 struct task_struct *owner; /* task of first print*/
1325 u64 ts_nsec; /* time of first print */
1326 u8 level; /* log level of first message */
1327 u8 facility; /* log level of first message */
1328 bool flushed:1; /* buffer sealed and committed */
1329} cont;
1330
1331static void cont_flush(void)
1332{
1333 if (cont.flushed)
1334 return;
1335 if (cont.len == 0)
1336 return;
1337
1338 log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec,
1339 NULL, 0, cont.buf, cont.len);
1340
1341 cont.flushed = true;
1342}
1343
1344static bool cont_add(int facility, int level, const char *text, size_t len)
1345{
1346 if (cont.len && cont.flushed)
1347 return false;
1348
1349 if (cont.len + len > sizeof(cont.buf)) {
1350 cont_flush();
1351 return false;
1352 }
1353
1354 if (!cont.len) {
1355 cont.facility = facility;
1356 cont.level = level;
1357 cont.owner = current;
1358 cont.ts_nsec = local_clock();
1359 cont.cons = 0;
1360 cont.flushed = false;
1361 }
1362
1363 memcpy(cont.buf + cont.len, text, len);
1364 cont.len += len;
1365 return true;
1366}
1367
1368static size_t cont_print_text(char *text, size_t size)
1369{
1370 size_t textlen = 0;
1371 size_t len;
1372
1373 if (cont.cons == 0) {
1374 textlen += print_time(cont.ts_nsec, text);
1375 size -= textlen;
1376 }
1377
1378 len = cont.len - cont.cons;
1379 if (len > 0) {
1380 if (len+1 > size)
1381 len = size-1;
1382 memcpy(text + textlen, cont.buf + cont.cons, len);
1383 textlen += len;
1384 cont.cons = cont.len;
1385 }
1386
1387 if (cont.flushed) {
1388 text[textlen++] = '\n';
1389 /* got everything, release buffer */
1390 cont.len = 0;
1391 }
1392 return textlen;
1393}
1394
1262asmlinkage int vprintk_emit(int facility, int level, 1395asmlinkage int vprintk_emit(int facility, int level,
1263 const char *dict, size_t dictlen, 1396 const char *dict, size_t dictlen,
1264 const char *fmt, va_list args) 1397 const char *fmt, va_list args)
1265{ 1398{
1266 static int recursion_bug; 1399 static int recursion_bug;
1267 static char cont_buf[LOG_LINE_MAX];
1268 static size_t cont_len;
1269 static int cont_level;
1270 static struct task_struct *cont_task;
1271 static char textbuf[LOG_LINE_MAX]; 1400 static char textbuf[LOG_LINE_MAX];
1272 char *text = textbuf; 1401 char *text = textbuf;
1273 size_t text_len; 1402 size_t text_len;
@@ -1313,7 +1442,8 @@ asmlinkage int vprintk_emit(int facility, int level,
1313 recursion_bug = 0; 1442 recursion_bug = 0;
1314 printed_len += strlen(recursion_msg); 1443 printed_len += strlen(recursion_msg);
1315 /* emit KERN_CRIT message */ 1444 /* emit KERN_CRIT message */
1316 log_store(0, 2, NULL, 0, recursion_msg, printed_len); 1445 log_store(0, 2, LOG_DEFAULT, 0,
1446 NULL, 0, recursion_msg, printed_len);
1317 } 1447 }
1318 1448
1319 /* 1449 /*
@@ -1351,55 +1481,37 @@ asmlinkage int vprintk_emit(int facility, int level,
1351 } 1481 }
1352 1482
1353 if (!newline) { 1483 if (!newline) {
1354 if (cont_len && (prefix || cont_task != current)) { 1484 /*
1355 /* 1485 * Flush the conflicting buffer. An earlier newline was missing,
1356 * Flush earlier buffer, which is either from a 1486 * or another task also prints continuation lines.
1357 * different thread, or when we got a new prefix. 1487 */
1358 */ 1488 if (cont.len && (prefix || cont.owner != current))
1359 log_store(facility, cont_level, NULL, 0, cont_buf, cont_len); 1489 cont_flush();
1360 cont_len = 0;
1361 }
1362
1363 if (!cont_len) {
1364 cont_level = level;
1365 cont_task = current;
1366 }
1367 1490
1368 /* buffer or append to earlier buffer from the same thread */ 1491 /* buffer line if possible, otherwise store it right away */
1369 if (cont_len + text_len > sizeof(cont_buf)) 1492 if (!cont_add(facility, level, text, text_len))
1370 text_len = sizeof(cont_buf) - cont_len; 1493 log_store(facility, level, LOG_DEFAULT, 0,
1371 memcpy(cont_buf + cont_len, text, text_len); 1494 dict, dictlen, text, text_len);
1372 cont_len += text_len;
1373 } else { 1495 } else {
1374 if (cont_len && cont_task == current) { 1496 bool stored = false;
1375 if (prefix) {
1376 /*
1377 * New prefix from the same thread; flush. We
1378 * either got no earlier newline, or we race
1379 * with an interrupt.
1380 */
1381 log_store(facility, cont_level,
1382 NULL, 0, cont_buf, cont_len);
1383 cont_len = 0;
1384 }
1385 1497
1386 /* append to the earlier buffer and flush */ 1498 /*
1387 if (cont_len + text_len > sizeof(cont_buf)) 1499 * If an earlier newline was missing and it was the same task,
1388 text_len = sizeof(cont_buf) - cont_len; 1500 * either merge it with the current buffer and flush, or if
1389 memcpy(cont_buf + cont_len, text, text_len); 1501 * there was a race with interrupts (prefix == true) then just
1390 cont_len += text_len; 1502 * flush it out and store this line separately.
1391 log_store(facility, cont_level, 1503 */
1392 NULL, 0, cont_buf, cont_len); 1504 if (cont.len && cont.owner == current) {
1393 cont_len = 0; 1505 if (!prefix)
1394 cont_task = NULL; 1506 stored = cont_add(facility, level, text, text_len);
1395 printed_len = cont_len; 1507 cont_flush();
1396 } else {
1397 /* ordinary single and terminated line */
1398 log_store(facility, level,
1399 dict, dictlen, text, text_len);
1400 printed_len = text_len;
1401 } 1508 }
1509
1510 if (!stored)
1511 log_store(facility, level, LOG_DEFAULT, 0,
1512 dict, dictlen, text, text_len);
1402 } 1513 }
1514 printed_len += text_len;
1403 1515
1404 /* 1516 /*
1405 * Try to acquire and then immediately release the console semaphore. 1517 * Try to acquire and then immediately release the console semaphore.
@@ -1486,11 +1598,18 @@ EXPORT_SYMBOL(printk);
1486#else 1598#else
1487 1599
1488#define LOG_LINE_MAX 0 1600#define LOG_LINE_MAX 0
1601static struct cont {
1602 size_t len;
1603 size_t cons;
1604 u8 level;
1605 bool flushed:1;
1606} cont;
1489static struct log *log_from_idx(u32 idx) { return NULL; } 1607static struct log *log_from_idx(u32 idx) { return NULL; }
1490static u32 log_next(u32 idx) { return 0; } 1608static u32 log_next(u32 idx) { return 0; }
1491static void call_console_drivers(int level, const char *text, size_t len) {} 1609static void call_console_drivers(int level, const char *text, size_t len) {}
1492static size_t msg_print_text(const struct log *msg, bool syslog, 1610static size_t msg_print_text(const struct log *msg, bool syslog,
1493 char *buf, size_t size) { return 0; } 1611 char *buf, size_t size) { return 0; }
1612static size_t cont_print_text(char *text, size_t size) { return 0; }
1494 1613
1495#endif /* CONFIG_PRINTK */ 1614#endif /* CONFIG_PRINTK */
1496 1615
@@ -1782,6 +1901,7 @@ static u32 console_idx;
1782 */ 1901 */
1783void console_unlock(void) 1902void console_unlock(void)
1784{ 1903{
1904 static char text[LOG_LINE_MAX];
1785 static u64 seen_seq; 1905 static u64 seen_seq;
1786 unsigned long flags; 1906 unsigned long flags;
1787 bool wake_klogd = false; 1907 bool wake_klogd = false;
@@ -1794,10 +1914,23 @@ void console_unlock(void)
1794 1914
1795 console_may_schedule = 0; 1915 console_may_schedule = 0;
1796 1916
1917 /* flush buffered message fragment immediately to console */
1918 raw_spin_lock_irqsave(&logbuf_lock, flags);
1919 if (cont.len && (cont.cons < cont.len || cont.flushed)) {
1920 size_t len;
1921
1922 len = cont_print_text(text, sizeof(text));
1923 raw_spin_unlock(&logbuf_lock);
1924 stop_critical_timings();
1925 call_console_drivers(cont.level, text, len);
1926 start_critical_timings();
1927 local_irq_restore(flags);
1928 } else
1929 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
1930
1797again: 1931again:
1798 for (;;) { 1932 for (;;) {
1799 struct log *msg; 1933 struct log *msg;
1800 static char text[LOG_LINE_MAX];
1801 size_t len; 1934 size_t len;
1802 int level; 1935 int level;
1803 1936
@@ -1812,13 +1945,22 @@ again:
1812 console_seq = log_first_seq; 1945 console_seq = log_first_seq;
1813 console_idx = log_first_idx; 1946 console_idx = log_first_idx;
1814 } 1947 }
1815 1948skip:
1816 if (console_seq == log_next_seq) 1949 if (console_seq == log_next_seq)
1817 break; 1950 break;
1818 1951
1819 msg = log_from_idx(console_idx); 1952 msg = log_from_idx(console_idx);
1820 level = msg->level & 7; 1953 if (msg->flags & LOG_NOCONS) {
1954 /*
1955 * Skip record we have buffered and already printed
1956 * directly to the console when we received it.
1957 */
1958 console_idx = log_next(console_idx);
1959 console_seq++;
1960 goto skip;
1961 }
1821 1962
1963 level = msg->level;
1822 len = msg_print_text(msg, false, text, sizeof(text)); 1964 len = msg_print_text(msg, false, text, sizeof(text));
1823 1965
1824 console_idx = log_next(console_idx); 1966 console_idx = log_next(console_idx);
@@ -2300,48 +2442,210 @@ module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
2300 * kmsg_dump - dump kernel log to kernel message dumpers. 2442 * kmsg_dump - dump kernel log to kernel message dumpers.
2301 * @reason: the reason (oops, panic etc) for dumping 2443 * @reason: the reason (oops, panic etc) for dumping
2302 * 2444 *
2303 * Iterate through each of the dump devices and call the oops/panic 2445 * Call each of the registered dumper's dump() callback, which can
2304 * callbacks with the log buffer. 2446 * retrieve the kmsg records with kmsg_dump_get_line() or
2447 * kmsg_dump_get_buffer().
2305 */ 2448 */
2306void kmsg_dump(enum kmsg_dump_reason reason) 2449void kmsg_dump(enum kmsg_dump_reason reason)
2307{ 2450{
2308 u64 idx;
2309 struct kmsg_dumper *dumper; 2451 struct kmsg_dumper *dumper;
2310 const char *s1, *s2;
2311 unsigned long l1, l2;
2312 unsigned long flags; 2452 unsigned long flags;
2313 2453
2314 if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) 2454 if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
2315 return; 2455 return;
2316 2456
2317 /* Theoretically, the log could move on after we do this, but 2457 rcu_read_lock();
2318 there's not a lot we can do about that. The new messages 2458 list_for_each_entry_rcu(dumper, &dump_list, list) {
2319 will overwrite the start of what we dump. */ 2459 if (dumper->max_reason && reason > dumper->max_reason)
2460 continue;
2461
2462 /* initialize iterator with data about the stored records */
2463 dumper->active = true;
2464
2465 raw_spin_lock_irqsave(&logbuf_lock, flags);
2466 dumper->cur_seq = clear_seq;
2467 dumper->cur_idx = clear_idx;
2468 dumper->next_seq = log_next_seq;
2469 dumper->next_idx = log_next_idx;
2470 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2471
2472 /* invoke dumper which will iterate over records */
2473 dumper->dump(dumper, reason);
2474
2475 /* reset iterator */
2476 dumper->active = false;
2477 }
2478 rcu_read_unlock();
2479}
2480
2481/**
2482 * kmsg_dump_get_line - retrieve one kmsg log line
2483 * @dumper: registered kmsg dumper
2484 * @syslog: include the "<4>" prefixes
2485 * @line: buffer to copy the line to
2486 * @size: maximum size of the buffer
2487 * @len: length of line placed into buffer
2488 *
2489 * Start at the beginning of the kmsg buffer, with the oldest kmsg
2490 * record, and copy one record into the provided buffer.
2491 *
2492 * Consecutive calls will return the next available record moving
2493 * towards the end of the buffer with the youngest messages.
2494 *
2495 * A return value of FALSE indicates that there are no more records to
2496 * read.
2497 */
2498bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
2499 char *line, size_t size, size_t *len)
2500{
2501 unsigned long flags;
2502 struct log *msg;
2503 size_t l = 0;
2504 bool ret = false;
2505
2506 if (!dumper->active)
2507 goto out;
2320 2508
2321 raw_spin_lock_irqsave(&logbuf_lock, flags); 2509 raw_spin_lock_irqsave(&logbuf_lock, flags);
2322 if (syslog_seq < log_first_seq) 2510 if (dumper->cur_seq < log_first_seq) {
2323 idx = syslog_idx; 2511 /* messages are gone, move to first available one */
2324 else 2512 dumper->cur_seq = log_first_seq;
2325 idx = log_first_idx; 2513 dumper->cur_idx = log_first_idx;
2514 }
2326 2515
2327 if (idx > log_next_idx) { 2516 /* last entry */
2328 s1 = log_buf; 2517 if (dumper->cur_seq >= log_next_seq) {
2329 l1 = log_next_idx; 2518 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2519 goto out;
2520 }
2330 2521
2331 s2 = log_buf + idx; 2522 msg = log_from_idx(dumper->cur_idx);
2332 l2 = log_buf_len - idx; 2523 l = msg_print_text(msg, syslog,
2333 } else { 2524 line, size);
2334 s1 = "";
2335 l1 = 0;
2336 2525
2337 s2 = log_buf + idx; 2526 dumper->cur_idx = log_next(dumper->cur_idx);
2338 l2 = log_next_idx - idx; 2527 dumper->cur_seq++;
2528 ret = true;
2529 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2530out:
2531 if (len)
2532 *len = l;
2533 return ret;
2534}
2535EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
2536
2537/**
2538 * kmsg_dump_get_buffer - copy kmsg log lines
2539 * @dumper: registered kmsg dumper
2540 * @syslog: include the "<4>" prefixes
2541 * @buf: buffer to copy the line to
2542 * @size: maximum size of the buffer
2543 * @len: length of line placed into buffer
2544 *
2545 * Start at the end of the kmsg buffer and fill the provided buffer
2546 * with as many of the the *youngest* kmsg records that fit into it.
2547 * If the buffer is large enough, all available kmsg records will be
2548 * copied with a single call.
2549 *
2550 * Consecutive calls will fill the buffer with the next block of
2551 * available older records, not including the earlier retrieved ones.
2552 *
2553 * A return value of FALSE indicates that there are no more records to
2554 * read.
2555 */
2556bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2557 char *buf, size_t size, size_t *len)
2558{
2559 unsigned long flags;
2560 u64 seq;
2561 u32 idx;
2562 u64 next_seq;
2563 u32 next_idx;
2564 size_t l = 0;
2565 bool ret = false;
2566
2567 if (!dumper->active)
2568 goto out;
2569
2570 raw_spin_lock_irqsave(&logbuf_lock, flags);
2571 if (dumper->cur_seq < log_first_seq) {
2572 /* messages are gone, move to first available one */
2573 dumper->cur_seq = log_first_seq;
2574 dumper->cur_idx = log_first_idx;
2339 } 2575 }
2576
2577 /* last entry */
2578 if (dumper->cur_seq >= dumper->next_seq) {
2579 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2580 goto out;
2581 }
2582
2583 /* calculate length of entire buffer */
2584 seq = dumper->cur_seq;
2585 idx = dumper->cur_idx;
2586 while (seq < dumper->next_seq) {
2587 struct log *msg = log_from_idx(idx);
2588
2589 l += msg_print_text(msg, true, NULL, 0);
2590 idx = log_next(idx);
2591 seq++;
2592 }
2593
2594 /* move first record forward until length fits into the buffer */
2595 seq = dumper->cur_seq;
2596 idx = dumper->cur_idx;
2597 while (l > size && seq < dumper->next_seq) {
2598 struct log *msg = log_from_idx(idx);
2599
2600 l -= msg_print_text(msg, true, NULL, 0);
2601 idx = log_next(idx);
2602 seq++;
2603 }
2604
2605 /* last message in next interation */
2606 next_seq = seq;
2607 next_idx = idx;
2608
2609 l = 0;
2610 while (seq < dumper->next_seq) {
2611 struct log *msg = log_from_idx(idx);
2612
2613 l += msg_print_text(msg, syslog,
2614 buf + l, size - l);
2615
2616 idx = log_next(idx);
2617 seq++;
2618 }
2619
2620 dumper->next_seq = next_seq;
2621 dumper->next_idx = next_idx;
2622 ret = true;
2340 raw_spin_unlock_irqrestore(&logbuf_lock, flags); 2623 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2624out:
2625 if (len)
2626 *len = l;
2627 return ret;
2628}
2629EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
2341 2630
2342 rcu_read_lock(); 2631/**
2343 list_for_each_entry_rcu(dumper, &dump_list, list) 2632 * kmsg_dump_rewind - reset the interator
2344 dumper->dump(dumper, reason, s1, l1, s2, l2); 2633 * @dumper: registered kmsg dumper
2345 rcu_read_unlock(); 2634 *
2635 * Reset the dumper's iterator so that kmsg_dump_get_line() and
2636 * kmsg_dump_get_buffer() can be called again and used multiple
2637 * times within the same dumper.dump() callback.
2638 */
2639void kmsg_dump_rewind(struct kmsg_dumper *dumper)
2640{
2641 unsigned long flags;
2642
2643 raw_spin_lock_irqsave(&logbuf_lock, flags);
2644 dumper->cur_seq = clear_seq;
2645 dumper->cur_idx = clear_idx;
2646 dumper->next_seq = log_next_seq;
2647 dumper->next_idx = log_next_idx;
2648 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2346} 2649}
2650EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
2347#endif 2651#endif
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0da7b88d92d0..38ecdda3f55f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1397,6 +1397,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1397 rdp->qlen_lazy += rsp->qlen_lazy; 1397 rdp->qlen_lazy += rsp->qlen_lazy;
1398 rdp->qlen += rsp->qlen; 1398 rdp->qlen += rsp->qlen;
1399 rdp->n_cbs_adopted += rsp->qlen; 1399 rdp->n_cbs_adopted += rsp->qlen;
1400 if (rsp->qlen_lazy != rsp->qlen)
1401 rcu_idle_count_callbacks_posted();
1400 rsp->qlen_lazy = 0; 1402 rsp->qlen_lazy = 0;
1401 rsp->qlen = 0; 1403 rsp->qlen = 0;
1402 1404
@@ -1528,7 +1530,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1528{ 1530{
1529 unsigned long flags; 1531 unsigned long flags;
1530 struct rcu_head *next, *list, **tail; 1532 struct rcu_head *next, *list, **tail;
1531 int bl, count, count_lazy; 1533 int bl, count, count_lazy, i;
1532 1534
1533 /* If no callbacks are ready, just return.*/ 1535 /* If no callbacks are ready, just return.*/
1534 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1536 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -1551,9 +1553,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1551 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1553 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1552 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1554 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1553 tail = rdp->nxttail[RCU_DONE_TAIL]; 1555 tail = rdp->nxttail[RCU_DONE_TAIL];
1554 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) 1556 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
1555 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) 1557 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1556 rdp->nxttail[count] = &rdp->nxtlist; 1558 rdp->nxttail[i] = &rdp->nxtlist;
1557 local_irq_restore(flags); 1559 local_irq_restore(flags);
1558 1560
1559 /* Invoke callbacks. */ 1561 /* Invoke callbacks. */
@@ -1581,9 +1583,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1581 if (list != NULL) { 1583 if (list != NULL) {
1582 *tail = rdp->nxtlist; 1584 *tail = rdp->nxtlist;
1583 rdp->nxtlist = list; 1585 rdp->nxtlist = list;
1584 for (count = 0; count < RCU_NEXT_SIZE; count++) 1586 for (i = 0; i < RCU_NEXT_SIZE; i++)
1585 if (&rdp->nxtlist == rdp->nxttail[count]) 1587 if (&rdp->nxtlist == rdp->nxttail[i])
1586 rdp->nxttail[count] = tail; 1588 rdp->nxttail[i] = tail;
1587 else 1589 else
1588 break; 1590 break;
1589 } 1591 }
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7f5d138dedf5..ea056495783e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,6 +84,20 @@ struct rcu_dynticks {
84 /* Process level is worth LLONG_MAX/2. */ 84 /* Process level is worth LLONG_MAX/2. */
85 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 85 int dynticks_nmi_nesting; /* Track NMI nesting level. */
86 atomic_t dynticks; /* Even value for idle, else odd. */ 86 atomic_t dynticks; /* Even value for idle, else odd. */
87#ifdef CONFIG_RCU_FAST_NO_HZ
88 int dyntick_drain; /* Prepare-for-idle state variable. */
89 unsigned long dyntick_holdoff;
90 /* No retries for the jiffy of failure. */
91 struct timer_list idle_gp_timer;
92 /* Wake up CPU sleeping with callbacks. */
93 unsigned long idle_gp_timer_expires;
94 /* When to wake up CPU (for repost). */
95 bool idle_first_pass; /* First pass of attempt to go idle? */
96 unsigned long nonlazy_posted;
97 /* # times non-lazy CBs posted to CPU. */
98 unsigned long nonlazy_posted_snap;
99 /* idle-period nonlazy_posted snapshot. */
100#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
87}; 101};
88 102
89/* RCU's kthread states for tracing. */ 103/* RCU's kthread states for tracing. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 2411000d9869..5271a020887e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
1886 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1886 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1887 * any flavor of RCU. 1887 * any flavor of RCU.
1888 */ 1888 */
1889int rcu_needs_cpu(int cpu) 1889int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1890{ 1890{
1891 *delta_jiffies = ULONG_MAX;
1891 return rcu_cpu_has_callbacks(cpu); 1892 return rcu_cpu_has_callbacks(cpu);
1892} 1893}
1893 1894
@@ -1962,41 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void)
1962#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ 1963#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
1963#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1964#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1964 1965
1965/* Loop counter for rcu_prepare_for_idle(). */
1966static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1967/* If rcu_dyntick_holdoff==jiffies, don't try to enter dyntick-idle mode. */
1968static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1969/* Timer to awaken the CPU if it enters dyntick-idle mode with callbacks. */
1970static DEFINE_PER_CPU(struct timer_list, rcu_idle_gp_timer);
1971/* Scheduled expiry time for rcu_idle_gp_timer to allow reposting. */
1972static DEFINE_PER_CPU(unsigned long, rcu_idle_gp_timer_expires);
1973/* Enable special processing on first attempt to enter dyntick-idle mode. */
1974static DEFINE_PER_CPU(bool, rcu_idle_first_pass);
1975/* Running count of non-lazy callbacks posted, never decremented. */
1976static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted);
1977/* Snapshot of rcu_nonlazy_posted to detect meaningful exits from idle. */
1978static DEFINE_PER_CPU(unsigned long, rcu_nonlazy_posted_snap);
1979
1980/*
1981 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
1982 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
1983 * dyntick-idle mode, or (3) this CPU is in the process of attempting to
1984 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
1985 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
1986 * it is better to incur scheduling-clock interrupts than to spin
1987 * continuously for the same time duration!
1988 */
1989int rcu_needs_cpu(int cpu)
1990{
1991 /* Flag a new idle sojourn to the idle-entry state machine. */
1992 per_cpu(rcu_idle_first_pass, cpu) = 1;
1993 /* If no callbacks, RCU doesn't need the CPU. */
1994 if (!rcu_cpu_has_callbacks(cpu))
1995 return 0;
1996 /* Otherwise, RCU needs the CPU only if it recently tried and failed. */
1997 return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies;
1998}
1999
2000/* 1966/*
2001 * Does the specified flavor of RCU have non-lazy callbacks pending on 1967 * Does the specified flavor of RCU have non-lazy callbacks pending on
2002 * the specified CPU? Both RCU flavor and CPU are specified by the 1968 * the specified CPU? Both RCU flavor and CPU are specified by the
@@ -2040,6 +2006,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
2040} 2006}
2041 2007
2042/* 2008/*
2009 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
2010 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
2011 * dyntick-idle mode, or (3) this CPU is in the process of attempting to
2012 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
2013 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
2014 * it is better to incur scheduling-clock interrupts than to spin
2015 * continuously for the same time duration!
2016 *
2017 * The delta_jiffies argument is used to store the time when RCU is
2018 * going to need the CPU again if it still has callbacks. The reason
2019 * for this is that rcu_prepare_for_idle() might need to post a timer,
2020 * but if so, it will do so after tick_nohz_stop_sched_tick() has set
2021 * the wakeup time for this CPU. This means that RCU's timer can be
2022 * delayed until the wakeup time, which defeats the purpose of posting
2023 * a timer.
2024 */
2025int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
2026{
2027 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2028
2029 /* Flag a new idle sojourn to the idle-entry state machine. */
2030 rdtp->idle_first_pass = 1;
2031 /* If no callbacks, RCU doesn't need the CPU. */
2032 if (!rcu_cpu_has_callbacks(cpu)) {
2033 *delta_jiffies = ULONG_MAX;
2034 return 0;
2035 }
2036 if (rdtp->dyntick_holdoff == jiffies) {
2037 /* RCU recently tried and failed, so don't try again. */
2038 *delta_jiffies = 1;
2039 return 1;
2040 }
2041 /* Set up for the possibility that RCU will post a timer. */
2042 if (rcu_cpu_has_nonlazy_callbacks(cpu))
2043 *delta_jiffies = RCU_IDLE_GP_DELAY;
2044 else
2045 *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
2046 return 0;
2047}
2048
2049/*
2043 * Handler for smp_call_function_single(). The only point of this 2050 * Handler for smp_call_function_single(). The only point of this
2044 * handler is to wake the CPU up, so the handler does only tracing. 2051 * handler is to wake the CPU up, so the handler does only tracing.
2045 */ 2052 */
@@ -2075,21 +2082,24 @@ static void rcu_idle_gp_timer_func(unsigned long cpu_in)
2075 */ 2082 */
2076static void rcu_prepare_for_idle_init(int cpu) 2083static void rcu_prepare_for_idle_init(int cpu)
2077{ 2084{
2078 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 2085 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2079 setup_timer(&per_cpu(rcu_idle_gp_timer, cpu), 2086
2080 rcu_idle_gp_timer_func, cpu); 2087 rdtp->dyntick_holdoff = jiffies - 1;
2081 per_cpu(rcu_idle_gp_timer_expires, cpu) = jiffies - 1; 2088 setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
2082 per_cpu(rcu_idle_first_pass, cpu) = 1; 2089 rdtp->idle_gp_timer_expires = jiffies - 1;
2090 rdtp->idle_first_pass = 1;
2083} 2091}
2084 2092
2085/* 2093/*
2086 * Clean up for exit from idle. Because we are exiting from idle, there 2094 * Clean up for exit from idle. Because we are exiting from idle, there
2087 * is no longer any point to rcu_idle_gp_timer, so cancel it. This will 2095 * is no longer any point to ->idle_gp_timer, so cancel it. This will
2088 * do nothing if this timer is not active, so just cancel it unconditionally. 2096 * do nothing if this timer is not active, so just cancel it unconditionally.
2089 */ 2097 */
2090static void rcu_cleanup_after_idle(int cpu) 2098static void rcu_cleanup_after_idle(int cpu)
2091{ 2099{
2092 del_timer(&per_cpu(rcu_idle_gp_timer, cpu)); 2100 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2101
2102 del_timer(&rdtp->idle_gp_timer);
2093 trace_rcu_prep_idle("Cleanup after idle"); 2103 trace_rcu_prep_idle("Cleanup after idle");
2094} 2104}
2095 2105
@@ -2108,42 +2118,41 @@ static void rcu_cleanup_after_idle(int cpu)
2108 * Because it is not legal to invoke rcu_process_callbacks() with irqs 2118 * Because it is not legal to invoke rcu_process_callbacks() with irqs
2109 * disabled, we do one pass of force_quiescent_state(), then do a 2119 * disabled, we do one pass of force_quiescent_state(), then do a
2110 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked 2120 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
2111 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. 2121 * later. The ->dyntick_drain field controls the sequencing.
2112 * 2122 *
2113 * The caller must have disabled interrupts. 2123 * The caller must have disabled interrupts.
2114 */ 2124 */
2115static void rcu_prepare_for_idle(int cpu) 2125static void rcu_prepare_for_idle(int cpu)
2116{ 2126{
2117 struct timer_list *tp; 2127 struct timer_list *tp;
2128 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2118 2129
2119 /* 2130 /*
2120 * If this is an idle re-entry, for example, due to use of 2131 * If this is an idle re-entry, for example, due to use of
2121 * RCU_NONIDLE() or the new idle-loop tracing API within the idle 2132 * RCU_NONIDLE() or the new idle-loop tracing API within the idle
2122 * loop, then don't take any state-machine actions, unless the 2133 * loop, then don't take any state-machine actions, unless the
2123 * momentary exit from idle queued additional non-lazy callbacks. 2134 * momentary exit from idle queued additional non-lazy callbacks.
2124 * Instead, repost the rcu_idle_gp_timer if this CPU has callbacks 2135 * Instead, repost the ->idle_gp_timer if this CPU has callbacks
2125 * pending. 2136 * pending.
2126 */ 2137 */
2127 if (!per_cpu(rcu_idle_first_pass, cpu) && 2138 if (!rdtp->idle_first_pass &&
2128 (per_cpu(rcu_nonlazy_posted, cpu) == 2139 (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) {
2129 per_cpu(rcu_nonlazy_posted_snap, cpu))) {
2130 if (rcu_cpu_has_callbacks(cpu)) { 2140 if (rcu_cpu_has_callbacks(cpu)) {
2131 tp = &per_cpu(rcu_idle_gp_timer, cpu); 2141 tp = &rdtp->idle_gp_timer;
2132 mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); 2142 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
2133 } 2143 }
2134 return; 2144 return;
2135 } 2145 }
2136 per_cpu(rcu_idle_first_pass, cpu) = 0; 2146 rdtp->idle_first_pass = 0;
2137 per_cpu(rcu_nonlazy_posted_snap, cpu) = 2147 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
2138 per_cpu(rcu_nonlazy_posted, cpu) - 1;
2139 2148
2140 /* 2149 /*
2141 * If there are no callbacks on this CPU, enter dyntick-idle mode. 2150 * If there are no callbacks on this CPU, enter dyntick-idle mode.
2142 * Also reset state to avoid prejudicing later attempts. 2151 * Also reset state to avoid prejudicing later attempts.
2143 */ 2152 */
2144 if (!rcu_cpu_has_callbacks(cpu)) { 2153 if (!rcu_cpu_has_callbacks(cpu)) {
2145 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 2154 rdtp->dyntick_holdoff = jiffies - 1;
2146 per_cpu(rcu_dyntick_drain, cpu) = 0; 2155 rdtp->dyntick_drain = 0;
2147 trace_rcu_prep_idle("No callbacks"); 2156 trace_rcu_prep_idle("No callbacks");
2148 return; 2157 return;
2149 } 2158 }
@@ -2152,36 +2161,37 @@ static void rcu_prepare_for_idle(int cpu)
2152 * If in holdoff mode, just return. We will presumably have 2161 * If in holdoff mode, just return. We will presumably have
2153 * refrained from disabling the scheduling-clock tick. 2162 * refrained from disabling the scheduling-clock tick.
2154 */ 2163 */
2155 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { 2164 if (rdtp->dyntick_holdoff == jiffies) {
2156 trace_rcu_prep_idle("In holdoff"); 2165 trace_rcu_prep_idle("In holdoff");
2157 return; 2166 return;
2158 } 2167 }
2159 2168
2160 /* Check and update the rcu_dyntick_drain sequencing. */ 2169 /* Check and update the ->dyntick_drain sequencing. */
2161 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2170 if (rdtp->dyntick_drain <= 0) {
2162 /* First time through, initialize the counter. */ 2171 /* First time through, initialize the counter. */
2163 per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; 2172 rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
2164 } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && 2173 } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
2165 !rcu_pending(cpu) && 2174 !rcu_pending(cpu) &&
2166 !local_softirq_pending()) { 2175 !local_softirq_pending()) {
2167 /* Can we go dyntick-idle despite still having callbacks? */ 2176 /* Can we go dyntick-idle despite still having callbacks? */
2168 trace_rcu_prep_idle("Dyntick with callbacks"); 2177 rdtp->dyntick_drain = 0;
2169 per_cpu(rcu_dyntick_drain, cpu) = 0; 2178 rdtp->dyntick_holdoff = jiffies;
2170 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 2179 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
2171 if (rcu_cpu_has_nonlazy_callbacks(cpu)) 2180 trace_rcu_prep_idle("Dyntick with callbacks");
2172 per_cpu(rcu_idle_gp_timer_expires, cpu) = 2181 rdtp->idle_gp_timer_expires =
2173 jiffies + RCU_IDLE_GP_DELAY; 2182 jiffies + RCU_IDLE_GP_DELAY;
2174 else 2183 } else {
2175 per_cpu(rcu_idle_gp_timer_expires, cpu) = 2184 rdtp->idle_gp_timer_expires =
2176 jiffies + RCU_IDLE_LAZY_GP_DELAY; 2185 jiffies + RCU_IDLE_LAZY_GP_DELAY;
2177 tp = &per_cpu(rcu_idle_gp_timer, cpu); 2186 trace_rcu_prep_idle("Dyntick with lazy callbacks");
2178 mod_timer_pinned(tp, per_cpu(rcu_idle_gp_timer_expires, cpu)); 2187 }
2179 per_cpu(rcu_nonlazy_posted_snap, cpu) = 2188 tp = &rdtp->idle_gp_timer;
2180 per_cpu(rcu_nonlazy_posted, cpu); 2189 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
2190 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
2181 return; /* Nothing more to do immediately. */ 2191 return; /* Nothing more to do immediately. */
2182 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2192 } else if (--(rdtp->dyntick_drain) <= 0) {
2183 /* We have hit the limit, so time to give up. */ 2193 /* We have hit the limit, so time to give up. */
2184 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 2194 rdtp->dyntick_holdoff = jiffies;
2185 trace_rcu_prep_idle("Begin holdoff"); 2195 trace_rcu_prep_idle("Begin holdoff");
2186 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ 2196 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
2187 return; 2197 return;
@@ -2227,7 +2237,7 @@ static void rcu_prepare_for_idle(int cpu)
2227 */ 2237 */
2228static void rcu_idle_count_callbacks_posted(void) 2238static void rcu_idle_count_callbacks_posted(void)
2229{ 2239{
2230 __this_cpu_add(rcu_nonlazy_posted, 1); 2240 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
2231} 2241}
2232 2242
2233#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2243#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
@@ -2238,11 +2248,12 @@ static void rcu_idle_count_callbacks_posted(void)
2238 2248
2239static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 2249static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2240{ 2250{
2241 struct timer_list *tltp = &per_cpu(rcu_idle_gp_timer, cpu); 2251 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2252 struct timer_list *tltp = &rdtp->idle_gp_timer;
2242 2253
2243 sprintf(cp, "drain=%d %c timer=%lu", 2254 sprintf(cp, "drain=%d %c timer=%lu",
2244 per_cpu(rcu_dyntick_drain, cpu), 2255 rdtp->dyntick_drain,
2245 per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.', 2256 rdtp->dyntick_holdoff == jiffies ? 'H' : '.',
2246 timer_pending(tltp) ? tltp->expires - jiffies : -1); 2257 timer_pending(tltp) ? tltp->expires - jiffies : -1);
2247} 2258}
2248 2259
diff --git a/kernel/relay.c b/kernel/relay.c
index ab56a1764d4d..e8cd2027abbd 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
1235 struct splice_pipe_desc spd = { 1235 struct splice_pipe_desc spd = {
1236 .pages = pages, 1236 .pages = pages,
1237 .nr_pages = 0, 1237 .nr_pages = 0,
1238 .nr_pages_max = PIPE_DEF_BUFFERS,
1238 .partial = partial, 1239 .partial = partial,
1239 .flags = flags, 1240 .flags = flags,
1240 .ops = &relay_pipe_buf_ops, 1241 .ops = &relay_pipe_buf_ops,
@@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
1302 ret += padding; 1303 ret += padding;
1303 1304
1304out: 1305out:
1305 splice_shrink_spd(pipe, &spd); 1306 splice_shrink_spd(&spd);
1306 return ret; 1307 return ret;
1307} 1308}
1308 1309
1309static ssize_t relay_file_splice_read(struct file *in, 1310static ssize_t relay_file_splice_read(struct file *in,
diff --git a/kernel/sys.c b/kernel/sys.c
index f0ec44dcd415..e0c8ffc50d7f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2127,9 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2127 else 2127 else
2128 return -EINVAL; 2128 return -EINVAL;
2129 break; 2129 break;
2130 case PR_GET_TID_ADDRESS:
2131 error = prctl_get_tid_address(me, (int __user **)arg2);
2132 break;
2133 default: 2130 default:
2134 return -EINVAL; 2131 return -EINVAL;
2135 } 2132 }
@@ -2147,6 +2144,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2147 case PR_SET_MM: 2144 case PR_SET_MM:
2148 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2145 error = prctl_set_mm(arg2, arg3, arg4, arg5);
2149 break; 2146 break;
2147 case PR_GET_TID_ADDRESS:
2148 error = prctl_get_tid_address(me, (int __user **)arg2);
2149 break;
2150 case PR_SET_CHILD_SUBREAPER: 2150 case PR_SET_CHILD_SUBREAPER:
2151 me->signal->is_child_subreaper = !!arg2; 2151 me->signal->is_child_subreaper = !!arg2;
2152 error = 0; 2152 error = 0;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index da70c6db496c..869997833928 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
274static void tick_nohz_stop_sched_tick(struct tick_sched *ts) 274static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
275{ 275{
276 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; 276 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
277 unsigned long rcu_delta_jiffies;
277 ktime_t last_update, expires, now; 278 ktime_t last_update, expires, now;
278 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 279 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
279 u64 time_delta; 280 u64 time_delta;
@@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
322 time_delta = timekeeping_max_deferment(); 323 time_delta = timekeeping_max_deferment();
323 } while (read_seqretry(&xtime_lock, seq)); 324 } while (read_seqretry(&xtime_lock, seq));
324 325
325 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || 326 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
326 arch_needs_cpu(cpu)) { 327 arch_needs_cpu(cpu)) {
327 next_jiffies = last_jiffies + 1; 328 next_jiffies = last_jiffies + 1;
328 delta_jiffies = 1; 329 delta_jiffies = 1;
@@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
330 /* Get the next timer wheel timer */ 331 /* Get the next timer wheel timer */
331 next_jiffies = get_next_timer_interrupt(last_jiffies); 332 next_jiffies = get_next_timer_interrupt(last_jiffies);
332 delta_jiffies = next_jiffies - last_jiffies; 333 delta_jiffies = next_jiffies - last_jiffies;
334 if (rcu_delta_jiffies < delta_jiffies) {
335 next_jiffies = last_jiffies + rcu_delta_jiffies;
336 delta_jiffies = rcu_delta_jiffies;
337 }
333 } 338 }
334 /* 339 /*
335 * Do not stop the tick, if we are only one off 340 * Do not stop the tick, if we are only one off
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 49249c28690d..a7fa0702be1c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3609,6 +3609,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3609 .pages = pages_def, 3609 .pages = pages_def,
3610 .partial = partial_def, 3610 .partial = partial_def,
3611 .nr_pages = 0, /* This gets updated below. */ 3611 .nr_pages = 0, /* This gets updated below. */
3612 .nr_pages_max = PIPE_DEF_BUFFERS,
3612 .flags = flags, 3613 .flags = flags,
3613 .ops = &tracing_pipe_buf_ops, 3614 .ops = &tracing_pipe_buf_ops,
3614 .spd_release = tracing_spd_release_pipe, 3615 .spd_release = tracing_spd_release_pipe,
@@ -3680,7 +3681,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3680 3681
3681 ret = splice_to_pipe(pipe, &spd); 3682 ret = splice_to_pipe(pipe, &spd);
3682out: 3683out:
3683 splice_shrink_spd(pipe, &spd); 3684 splice_shrink_spd(&spd);
3684 return ret; 3685 return ret;
3685 3686
3686out_err: 3687out_err:
@@ -4231,6 +4232,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4231 struct splice_pipe_desc spd = { 4232 struct splice_pipe_desc spd = {
4232 .pages = pages_def, 4233 .pages = pages_def,
4233 .partial = partial_def, 4234 .partial = partial_def,
4235 .nr_pages_max = PIPE_DEF_BUFFERS,
4234 .flags = flags, 4236 .flags = flags,
4235 .ops = &buffer_pipe_buf_ops, 4237 .ops = &buffer_pipe_buf_ops,
4236 .spd_release = buffer_spd_release, 4238 .spd_release = buffer_spd_release,
@@ -4318,7 +4320,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4318 } 4320 }
4319 4321
4320 ret = splice_to_pipe(pipe, &spd); 4322 ret = splice_to_pipe(pipe, &spd);
4321 splice_shrink_spd(pipe, &spd); 4323 splice_shrink_spd(&spd);
4322out: 4324out:
4323 return ret; 4325 return ret;
4324} 4326}