aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-07-18 05:17:17 -0400
committerIngo Molnar <mingo@kernel.org>2012-07-18 05:17:17 -0400
commita2fe194723f6e4990d01d8c208c7b138fd410522 (patch)
tree7aee93fa8f4ba1e18b56fa7d8eab75d249fc6966 /kernel
parentc3b7cdf180090d2686239a75bb0ae408108ed749 (diff)
parenta018540141a931f5299a866907b27886916b4374 (diff)
Merge branch 'linus' into perf/core
Pick up the latest ring-buffer fixes, before applying a new fix. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c23
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/hrtimer.c53
-rw-r--r--kernel/printk.c202
-rw-r--r--kernel/rcutree.c1
-rw-r--r--kernel/rcutree.h1
-rw-r--r--kernel/rcutree_plugin.h14
-rw-r--r--kernel/sched/core.c276
-rw-r--r--kernel/sched/idle_task.c1
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/sys.c16
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c64
-rw-r--r--kernel/trace/ring_buffer.c6
14 files changed, 472 insertions, 200 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2097684cf194..b303dfc7dce0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -901,13 +901,10 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
901 mutex_unlock(&cgroup_mutex); 901 mutex_unlock(&cgroup_mutex);
902 902
903 /* 903 /*
904 * We want to drop the active superblock reference from the 904 * Drop the active superblock reference that we took when we
905 * cgroup creation after all the dentry refs are gone - 905 * created the cgroup
906 * kill_sb gets mighty unhappy otherwise. Mark
907 * dentry->d_fsdata with cgroup_diput() to tell
908 * cgroup_d_release() to call deactivate_super().
909 */ 906 */
910 dentry->d_fsdata = cgroup_diput; 907 deactivate_super(cgrp->root->sb);
911 908
912 /* 909 /*
913 * if we're getting rid of the cgroup, refcount should ensure 910 * if we're getting rid of the cgroup, refcount should ensure
@@ -933,13 +930,6 @@ static int cgroup_delete(const struct dentry *d)
933 return 1; 930 return 1;
934} 931}
935 932
936static void cgroup_d_release(struct dentry *dentry)
937{
938 /* did cgroup_diput() tell me to deactivate super? */
939 if (dentry->d_fsdata == cgroup_diput)
940 deactivate_super(dentry->d_sb);
941}
942
943static void remove_dir(struct dentry *d) 933static void remove_dir(struct dentry *d)
944{ 934{
945 struct dentry *parent = dget(d->d_parent); 935 struct dentry *parent = dget(d->d_parent);
@@ -1547,7 +1537,6 @@ static int cgroup_get_rootdir(struct super_block *sb)
1547 static const struct dentry_operations cgroup_dops = { 1537 static const struct dentry_operations cgroup_dops = {
1548 .d_iput = cgroup_diput, 1538 .d_iput = cgroup_diput,
1549 .d_delete = cgroup_delete, 1539 .d_delete = cgroup_delete,
1550 .d_release = cgroup_d_release,
1551 }; 1540 };
1552 1541
1553 struct inode *inode = 1542 struct inode *inode =
@@ -3894,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work)
3894{ 3883{
3895 struct cgroup_subsys_state *css = 3884 struct cgroup_subsys_state *css =
3896 container_of(work, struct cgroup_subsys_state, dput_work); 3885 container_of(work, struct cgroup_subsys_state, dput_work);
3886 struct dentry *dentry = css->cgroup->dentry;
3887 struct super_block *sb = dentry->d_sb;
3897 3888
3898 dput(css->cgroup->dentry); 3889 atomic_inc(&sb->s_active);
3890 dput(dentry);
3891 deactivate_super(sb);
3899} 3892}
3900 3893
3901static void init_cgroup_css(struct cgroup_subsys_state *css, 3894static void init_cgroup_css(struct cgroup_subsys_state *css,
diff --git a/kernel/fork.c b/kernel/fork.c
index ab5211b9e622..f00e319d8376 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -304,12 +304,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
304 } 304 }
305 305
306 err = arch_dup_task_struct(tsk, orig); 306 err = arch_dup_task_struct(tsk, orig);
307 if (err)
308 goto out;
309 307
308 /*
309 * We defer looking at err, because we will need this setup
310 * for the clean up path to work correctly.
311 */
310 tsk->stack = ti; 312 tsk->stack = ti;
311
312 setup_thread_stack(tsk, orig); 313 setup_thread_stack(tsk, orig);
314
315 if (err)
316 goto out;
317
313 clear_user_return_notifier(tsk); 318 clear_user_return_notifier(tsk);
314 clear_tsk_need_resched(tsk); 319 clear_tsk_need_resched(tsk);
315 stackend = end_of_stack(tsk); 320 stackend = end_of_stack(tsk);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ae34bf51682b..6db7a5ed52b5 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
657 return 0; 657 return 0;
658} 658}
659 659
660static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
661{
662 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
663 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
664
665 return ktime_get_update_offsets(offs_real, offs_boot);
666}
667
660/* 668/*
661 * Retrigger next event is called after clock was set 669 * Retrigger next event is called after clock was set
662 * 670 *
@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
665static void retrigger_next_event(void *arg) 673static void retrigger_next_event(void *arg)
666{ 674{
667 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); 675 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
668 struct timespec realtime_offset, xtim, wtm, sleep;
669 676
670 if (!hrtimer_hres_active()) 677 if (!hrtimer_hres_active())
671 return; 678 return;
672 679
673 /* Optimized out for !HIGH_RES */
674 get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
675 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
676
677 /* Adjust CLOCK_REALTIME offset */
678 raw_spin_lock(&base->lock); 680 raw_spin_lock(&base->lock);
679 base->clock_base[HRTIMER_BASE_REALTIME].offset = 681 hrtimer_update_base(base);
680 timespec_to_ktime(realtime_offset);
681 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
682 timespec_to_ktime(sleep);
683
684 hrtimer_force_reprogram(base, 0); 682 hrtimer_force_reprogram(base, 0);
685 raw_spin_unlock(&base->lock); 683 raw_spin_unlock(&base->lock);
686} 684}
@@ -710,13 +708,25 @@ static int hrtimer_switch_to_hres(void)
710 base->clock_base[i].resolution = KTIME_HIGH_RES; 708 base->clock_base[i].resolution = KTIME_HIGH_RES;
711 709
712 tick_setup_sched_timer(); 710 tick_setup_sched_timer();
713
714 /* "Retrigger" the interrupt to get things going */ 711 /* "Retrigger" the interrupt to get things going */
715 retrigger_next_event(NULL); 712 retrigger_next_event(NULL);
716 local_irq_restore(flags); 713 local_irq_restore(flags);
717 return 1; 714 return 1;
718} 715}
719 716
717/*
718 * Called from timekeeping code to reprogramm the hrtimer interrupt
719 * device. If called from the timer interrupt context we defer it to
720 * softirq context.
721 */
722void clock_was_set_delayed(void)
723{
724 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
725
726 cpu_base->clock_was_set = 1;
727 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
728}
729
720#else 730#else
721 731
722static inline int hrtimer_hres_active(void) { return 0; } 732static inline int hrtimer_hres_active(void) { return 0; }
@@ -1250,11 +1260,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1250 cpu_base->nr_events++; 1260 cpu_base->nr_events++;
1251 dev->next_event.tv64 = KTIME_MAX; 1261 dev->next_event.tv64 = KTIME_MAX;
1252 1262
1253 entry_time = now = ktime_get(); 1263 raw_spin_lock(&cpu_base->lock);
1264 entry_time = now = hrtimer_update_base(cpu_base);
1254retry: 1265retry:
1255 expires_next.tv64 = KTIME_MAX; 1266 expires_next.tv64 = KTIME_MAX;
1256
1257 raw_spin_lock(&cpu_base->lock);
1258 /* 1267 /*
1259 * We set expires_next to KTIME_MAX here with cpu_base->lock 1268 * We set expires_next to KTIME_MAX here with cpu_base->lock
1260 * held to prevent that a timer is enqueued in our queue via 1269 * held to prevent that a timer is enqueued in our queue via
@@ -1330,8 +1339,12 @@ retry:
1330 * We need to prevent that we loop forever in the hrtimer 1339 * We need to prevent that we loop forever in the hrtimer
1331 * interrupt routine. We give it 3 attempts to avoid 1340 * interrupt routine. We give it 3 attempts to avoid
1332 * overreacting on some spurious event. 1341 * overreacting on some spurious event.
1342 *
1343 * Acquire base lock for updating the offsets and retrieving
1344 * the current time.
1333 */ 1345 */
1334 now = ktime_get(); 1346 raw_spin_lock(&cpu_base->lock);
1347 now = hrtimer_update_base(cpu_base);
1335 cpu_base->nr_retries++; 1348 cpu_base->nr_retries++;
1336 if (++retries < 3) 1349 if (++retries < 3)
1337 goto retry; 1350 goto retry;
@@ -1343,6 +1356,7 @@ retry:
1343 */ 1356 */
1344 cpu_base->nr_hangs++; 1357 cpu_base->nr_hangs++;
1345 cpu_base->hang_detected = 1; 1358 cpu_base->hang_detected = 1;
1359 raw_spin_unlock(&cpu_base->lock);
1346 delta = ktime_sub(now, entry_time); 1360 delta = ktime_sub(now, entry_time);
1347 if (delta.tv64 > cpu_base->max_hang_time.tv64) 1361 if (delta.tv64 > cpu_base->max_hang_time.tv64)
1348 cpu_base->max_hang_time = delta; 1362 cpu_base->max_hang_time = delta;
@@ -1395,6 +1409,13 @@ void hrtimer_peek_ahead_timers(void)
1395 1409
1396static void run_hrtimer_softirq(struct softirq_action *h) 1410static void run_hrtimer_softirq(struct softirq_action *h)
1397{ 1411{
1412 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1413
1414 if (cpu_base->clock_was_set) {
1415 cpu_base->clock_was_set = 0;
1416 clock_was_set();
1417 }
1418
1398 hrtimer_peek_ahead_timers(); 1419 hrtimer_peek_ahead_timers();
1399} 1420}
1400 1421
diff --git a/kernel/printk.c b/kernel/printk.c
index dba18211685e..177fa49357a5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -194,8 +194,10 @@ static int console_may_schedule;
194 */ 194 */
195 195
196enum log_flags { 196enum log_flags {
197 LOG_DEFAULT = 0, 197 LOG_NOCONS = 1, /* already flushed, do not print to console */
198 LOG_NOCONS = 1, /* already flushed, do not print to console */ 198 LOG_NEWLINE = 2, /* text ended with a newline */
199 LOG_PREFIX = 4, /* text started with a prefix */
200 LOG_CONT = 8, /* text is a fragment of a continuation line */
199}; 201};
200 202
201struct log { 203struct log {
@@ -217,6 +219,8 @@ static DEFINE_RAW_SPINLOCK(logbuf_lock);
217/* the next printk record to read by syslog(READ) or /proc/kmsg */ 219/* the next printk record to read by syslog(READ) or /proc/kmsg */
218static u64 syslog_seq; 220static u64 syslog_seq;
219static u32 syslog_idx; 221static u32 syslog_idx;
222static enum log_flags syslog_prev;
223static size_t syslog_partial;
220 224
221/* index and sequence number of the first record stored in the buffer */ 225/* index and sequence number of the first record stored in the buffer */
222static u64 log_first_seq; 226static u64 log_first_seq;
@@ -430,20 +434,20 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
430 ret = mutex_lock_interruptible(&user->lock); 434 ret = mutex_lock_interruptible(&user->lock);
431 if (ret) 435 if (ret)
432 return ret; 436 return ret;
433 raw_spin_lock(&logbuf_lock); 437 raw_spin_lock_irq(&logbuf_lock);
434 while (user->seq == log_next_seq) { 438 while (user->seq == log_next_seq) {
435 if (file->f_flags & O_NONBLOCK) { 439 if (file->f_flags & O_NONBLOCK) {
436 ret = -EAGAIN; 440 ret = -EAGAIN;
437 raw_spin_unlock(&logbuf_lock); 441 raw_spin_unlock_irq(&logbuf_lock);
438 goto out; 442 goto out;
439 } 443 }
440 444
441 raw_spin_unlock(&logbuf_lock); 445 raw_spin_unlock_irq(&logbuf_lock);
442 ret = wait_event_interruptible(log_wait, 446 ret = wait_event_interruptible(log_wait,
443 user->seq != log_next_seq); 447 user->seq != log_next_seq);
444 if (ret) 448 if (ret)
445 goto out; 449 goto out;
446 raw_spin_lock(&logbuf_lock); 450 raw_spin_lock_irq(&logbuf_lock);
447 } 451 }
448 452
449 if (user->seq < log_first_seq) { 453 if (user->seq < log_first_seq) {
@@ -451,7 +455,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
451 user->idx = log_first_idx; 455 user->idx = log_first_idx;
452 user->seq = log_first_seq; 456 user->seq = log_first_seq;
453 ret = -EPIPE; 457 ret = -EPIPE;
454 raw_spin_unlock(&logbuf_lock); 458 raw_spin_unlock_irq(&logbuf_lock);
455 goto out; 459 goto out;
456 } 460 }
457 461
@@ -465,7 +469,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
465 for (i = 0; i < msg->text_len; i++) { 469 for (i = 0; i < msg->text_len; i++) {
466 unsigned char c = log_text(msg)[i]; 470 unsigned char c = log_text(msg)[i];
467 471
468 if (c < ' ' || c >= 128) 472 if (c < ' ' || c >= 127 || c == '\\')
469 len += sprintf(user->buf + len, "\\x%02x", c); 473 len += sprintf(user->buf + len, "\\x%02x", c);
470 else 474 else
471 user->buf[len++] = c; 475 user->buf[len++] = c;
@@ -489,7 +493,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
489 continue; 493 continue;
490 } 494 }
491 495
492 if (c < ' ' || c >= 128) { 496 if (c < ' ' || c >= 127 || c == '\\') {
493 len += sprintf(user->buf + len, "\\x%02x", c); 497 len += sprintf(user->buf + len, "\\x%02x", c);
494 continue; 498 continue;
495 } 499 }
@@ -501,7 +505,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
501 505
502 user->idx = log_next(user->idx); 506 user->idx = log_next(user->idx);
503 user->seq++; 507 user->seq++;
504 raw_spin_unlock(&logbuf_lock); 508 raw_spin_unlock_irq(&logbuf_lock);
505 509
506 if (len > count) { 510 if (len > count) {
507 ret = -EINVAL; 511 ret = -EINVAL;
@@ -528,7 +532,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
528 if (offset) 532 if (offset)
529 return -ESPIPE; 533 return -ESPIPE;
530 534
531 raw_spin_lock(&logbuf_lock); 535 raw_spin_lock_irq(&logbuf_lock);
532 switch (whence) { 536 switch (whence) {
533 case SEEK_SET: 537 case SEEK_SET:
534 /* the first record */ 538 /* the first record */
@@ -552,7 +556,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
552 default: 556 default:
553 ret = -EINVAL; 557 ret = -EINVAL;
554 } 558 }
555 raw_spin_unlock(&logbuf_lock); 559 raw_spin_unlock_irq(&logbuf_lock);
556 return ret; 560 return ret;
557} 561}
558 562
@@ -566,14 +570,14 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
566 570
567 poll_wait(file, &log_wait, wait); 571 poll_wait(file, &log_wait, wait);
568 572
569 raw_spin_lock(&logbuf_lock); 573 raw_spin_lock_irq(&logbuf_lock);
570 if (user->seq < log_next_seq) { 574 if (user->seq < log_next_seq) {
571 /* return error when data has vanished underneath us */ 575 /* return error when data has vanished underneath us */
572 if (user->seq < log_first_seq) 576 if (user->seq < log_first_seq)
573 ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; 577 ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
574 ret = POLLIN|POLLRDNORM; 578 ret = POLLIN|POLLRDNORM;
575 } 579 }
576 raw_spin_unlock(&logbuf_lock); 580 raw_spin_unlock_irq(&logbuf_lock);
577 581
578 return ret; 582 return ret;
579} 583}
@@ -597,10 +601,10 @@ static int devkmsg_open(struct inode *inode, struct file *file)
597 601
598 mutex_init(&user->lock); 602 mutex_init(&user->lock);
599 603
600 raw_spin_lock(&logbuf_lock); 604 raw_spin_lock_irq(&logbuf_lock);
601 user->idx = log_first_idx; 605 user->idx = log_first_idx;
602 user->seq = log_first_seq; 606 user->seq = log_first_seq;
603 raw_spin_unlock(&logbuf_lock); 607 raw_spin_unlock_irq(&logbuf_lock);
604 608
605 file->private_data = user; 609 file->private_data = user;
606 return 0; 610 return 0;
@@ -818,15 +822,18 @@ static size_t print_time(u64 ts, char *buf)
818static size_t print_prefix(const struct log *msg, bool syslog, char *buf) 822static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
819{ 823{
820 size_t len = 0; 824 size_t len = 0;
825 unsigned int prefix = (msg->facility << 3) | msg->level;
821 826
822 if (syslog) { 827 if (syslog) {
823 if (buf) { 828 if (buf) {
824 len += sprintf(buf, "<%u>", msg->level); 829 len += sprintf(buf, "<%u>", prefix);
825 } else { 830 } else {
826 len += 3; 831 len += 3;
827 if (msg->level > 9) 832 if (prefix > 999)
828 len++; 833 len += 3;
829 if (msg->level > 99) 834 else if (prefix > 99)
835 len += 2;
836 else if (prefix > 9)
830 len++; 837 len++;
831 } 838 }
832 } 839 }
@@ -835,13 +842,26 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
835 return len; 842 return len;
836} 843}
837 844
838static size_t msg_print_text(const struct log *msg, bool syslog, 845static size_t msg_print_text(const struct log *msg, enum log_flags prev,
839 char *buf, size_t size) 846 bool syslog, char *buf, size_t size)
840{ 847{
841 const char *text = log_text(msg); 848 const char *text = log_text(msg);
842 size_t text_size = msg->text_len; 849 size_t text_size = msg->text_len;
850 bool prefix = true;
851 bool newline = true;
843 size_t len = 0; 852 size_t len = 0;
844 853
854 if ((prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))
855 prefix = false;
856
857 if (msg->flags & LOG_CONT) {
858 if ((prev & LOG_CONT) && !(prev & LOG_NEWLINE))
859 prefix = false;
860
861 if (!(msg->flags & LOG_NEWLINE))
862 newline = false;
863 }
864
845 do { 865 do {
846 const char *next = memchr(text, '\n', text_size); 866 const char *next = memchr(text, '\n', text_size);
847 size_t text_len; 867 size_t text_len;
@@ -859,16 +879,22 @@ static size_t msg_print_text(const struct log *msg, bool syslog,
859 text_len + 1>= size - len) 879 text_len + 1>= size - len)
860 break; 880 break;
861 881
862 len += print_prefix(msg, syslog, buf + len); 882 if (prefix)
883 len += print_prefix(msg, syslog, buf + len);
863 memcpy(buf + len, text, text_len); 884 memcpy(buf + len, text, text_len);
864 len += text_len; 885 len += text_len;
865 buf[len++] = '\n'; 886 if (next || newline)
887 buf[len++] = '\n';
866 } else { 888 } else {
867 /* SYSLOG_ACTION_* buffer size only calculation */ 889 /* SYSLOG_ACTION_* buffer size only calculation */
868 len += print_prefix(msg, syslog, NULL); 890 if (prefix)
869 len += text_len + 1; 891 len += print_prefix(msg, syslog, NULL);
892 len += text_len;
893 if (next || newline)
894 len++;
870 } 895 }
871 896
897 prefix = true;
872 text = next; 898 text = next;
873 } while (text); 899 } while (text);
874 900
@@ -887,22 +913,35 @@ static int syslog_print(char __user *buf, int size)
887 913
888 while (size > 0) { 914 while (size > 0) {
889 size_t n; 915 size_t n;
916 size_t skip;
890 917
891 raw_spin_lock_irq(&logbuf_lock); 918 raw_spin_lock_irq(&logbuf_lock);
892 if (syslog_seq < log_first_seq) { 919 if (syslog_seq < log_first_seq) {
893 /* messages are gone, move to first one */ 920 /* messages are gone, move to first one */
894 syslog_seq = log_first_seq; 921 syslog_seq = log_first_seq;
895 syslog_idx = log_first_idx; 922 syslog_idx = log_first_idx;
923 syslog_prev = 0;
924 syslog_partial = 0;
896 } 925 }
897 if (syslog_seq == log_next_seq) { 926 if (syslog_seq == log_next_seq) {
898 raw_spin_unlock_irq(&logbuf_lock); 927 raw_spin_unlock_irq(&logbuf_lock);
899 break; 928 break;
900 } 929 }
930
931 skip = syslog_partial;
901 msg = log_from_idx(syslog_idx); 932 msg = log_from_idx(syslog_idx);
902 n = msg_print_text(msg, true, text, LOG_LINE_MAX); 933 n = msg_print_text(msg, syslog_prev, true, text, LOG_LINE_MAX);
903 if (n <= size) { 934 if (n - syslog_partial <= size) {
935 /* message fits into buffer, move forward */
904 syslog_idx = log_next(syslog_idx); 936 syslog_idx = log_next(syslog_idx);
905 syslog_seq++; 937 syslog_seq++;
938 syslog_prev = msg->flags;
939 n -= syslog_partial;
940 syslog_partial = 0;
941 } else if (!len){
942 /* partial read(), remember position */
943 n = size;
944 syslog_partial += n;
906 } else 945 } else
907 n = 0; 946 n = 0;
908 raw_spin_unlock_irq(&logbuf_lock); 947 raw_spin_unlock_irq(&logbuf_lock);
@@ -910,17 +949,15 @@ static int syslog_print(char __user *buf, int size)
910 if (!n) 949 if (!n)
911 break; 950 break;
912 951
913 len += n; 952 if (copy_to_user(buf, text + skip, n)) {
914 size -= n;
915 buf += n;
916 n = copy_to_user(buf - n, text, n);
917
918 if (n) {
919 len -= n;
920 if (!len) 953 if (!len)
921 len = -EFAULT; 954 len = -EFAULT;
922 break; 955 break;
923 } 956 }
957
958 len += n;
959 size -= n;
960 buf += n;
924 } 961 }
925 962
926 kfree(text); 963 kfree(text);
@@ -941,6 +978,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
941 u64 next_seq; 978 u64 next_seq;
942 u64 seq; 979 u64 seq;
943 u32 idx; 980 u32 idx;
981 enum log_flags prev;
944 982
945 if (clear_seq < log_first_seq) { 983 if (clear_seq < log_first_seq) {
946 /* messages are gone, move to first available one */ 984 /* messages are gone, move to first available one */
@@ -954,10 +992,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
954 */ 992 */
955 seq = clear_seq; 993 seq = clear_seq;
956 idx = clear_idx; 994 idx = clear_idx;
995 prev = 0;
957 while (seq < log_next_seq) { 996 while (seq < log_next_seq) {
958 struct log *msg = log_from_idx(idx); 997 struct log *msg = log_from_idx(idx);
959 998
960 len += msg_print_text(msg, true, NULL, 0); 999 len += msg_print_text(msg, prev, true, NULL, 0);
961 idx = log_next(idx); 1000 idx = log_next(idx);
962 seq++; 1001 seq++;
963 } 1002 }
@@ -965,10 +1004,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
965 /* move first record forward until length fits into the buffer */ 1004 /* move first record forward until length fits into the buffer */
966 seq = clear_seq; 1005 seq = clear_seq;
967 idx = clear_idx; 1006 idx = clear_idx;
1007 prev = 0;
968 while (len > size && seq < log_next_seq) { 1008 while (len > size && seq < log_next_seq) {
969 struct log *msg = log_from_idx(idx); 1009 struct log *msg = log_from_idx(idx);
970 1010
971 len -= msg_print_text(msg, true, NULL, 0); 1011 len -= msg_print_text(msg, prev, true, NULL, 0);
972 idx = log_next(idx); 1012 idx = log_next(idx);
973 seq++; 1013 seq++;
974 } 1014 }
@@ -977,17 +1017,19 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
977 next_seq = log_next_seq; 1017 next_seq = log_next_seq;
978 1018
979 len = 0; 1019 len = 0;
1020 prev = 0;
980 while (len >= 0 && seq < next_seq) { 1021 while (len >= 0 && seq < next_seq) {
981 struct log *msg = log_from_idx(idx); 1022 struct log *msg = log_from_idx(idx);
982 int textlen; 1023 int textlen;
983 1024
984 textlen = msg_print_text(msg, true, text, LOG_LINE_MAX); 1025 textlen = msg_print_text(msg, prev, true, text, LOG_LINE_MAX);
985 if (textlen < 0) { 1026 if (textlen < 0) {
986 len = textlen; 1027 len = textlen;
987 break; 1028 break;
988 } 1029 }
989 idx = log_next(idx); 1030 idx = log_next(idx);
990 seq++; 1031 seq++;
1032 prev = msg->flags;
991 1033
992 raw_spin_unlock_irq(&logbuf_lock); 1034 raw_spin_unlock_irq(&logbuf_lock);
993 if (copy_to_user(buf + len, text, textlen)) 1035 if (copy_to_user(buf + len, text, textlen))
@@ -1000,6 +1042,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1000 /* messages are gone, move to next one */ 1042 /* messages are gone, move to next one */
1001 seq = log_first_seq; 1043 seq = log_first_seq;
1002 idx = log_first_idx; 1044 idx = log_first_idx;
1045 prev = 0;
1003 } 1046 }
1004 } 1047 }
1005 } 1048 }
@@ -1018,7 +1061,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1018{ 1061{
1019 bool clear = false; 1062 bool clear = false;
1020 static int saved_console_loglevel = -1; 1063 static int saved_console_loglevel = -1;
1021 static DEFINE_MUTEX(syslog_mutex);
1022 int error; 1064 int error;
1023 1065
1024 error = check_syslog_permissions(type, from_file); 1066 error = check_syslog_permissions(type, from_file);
@@ -1045,17 +1087,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1045 error = -EFAULT; 1087 error = -EFAULT;
1046 goto out; 1088 goto out;
1047 } 1089 }
1048 error = mutex_lock_interruptible(&syslog_mutex);
1049 if (error)
1050 goto out;
1051 error = wait_event_interruptible(log_wait, 1090 error = wait_event_interruptible(log_wait,
1052 syslog_seq != log_next_seq); 1091 syslog_seq != log_next_seq);
1053 if (error) { 1092 if (error)
1054 mutex_unlock(&syslog_mutex);
1055 goto out; 1093 goto out;
1056 }
1057 error = syslog_print(buf, len); 1094 error = syslog_print(buf, len);
1058 mutex_unlock(&syslog_mutex);
1059 break; 1095 break;
1060 /* Read/clear last kernel messages */ 1096 /* Read/clear last kernel messages */
1061 case SYSLOG_ACTION_READ_CLEAR: 1097 case SYSLOG_ACTION_READ_CLEAR:
@@ -1111,6 +1147,8 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1111 /* messages are gone, move to first one */ 1147 /* messages are gone, move to first one */
1112 syslog_seq = log_first_seq; 1148 syslog_seq = log_first_seq;
1113 syslog_idx = log_first_idx; 1149 syslog_idx = log_first_idx;
1150 syslog_prev = 0;
1151 syslog_partial = 0;
1114 } 1152 }
1115 if (from_file) { 1153 if (from_file) {
1116 /* 1154 /*
@@ -1120,19 +1158,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1120 */ 1158 */
1121 error = log_next_idx - syslog_idx; 1159 error = log_next_idx - syslog_idx;
1122 } else { 1160 } else {
1123 u64 seq; 1161 u64 seq = syslog_seq;
1124 u32 idx; 1162 u32 idx = syslog_idx;
1163 enum log_flags prev = syslog_prev;
1125 1164
1126 error = 0; 1165 error = 0;
1127 seq = syslog_seq;
1128 idx = syslog_idx;
1129 while (seq < log_next_seq) { 1166 while (seq < log_next_seq) {
1130 struct log *msg = log_from_idx(idx); 1167 struct log *msg = log_from_idx(idx);
1131 1168
1132 error += msg_print_text(msg, true, NULL, 0); 1169 error += msg_print_text(msg, prev, true, NULL, 0);
1133 idx = log_next(idx); 1170 idx = log_next(idx);
1134 seq++; 1171 seq++;
1172 prev = msg->flags;
1135 } 1173 }
1174 error -= syslog_partial;
1136 } 1175 }
1137 raw_spin_unlock_irq(&logbuf_lock); 1176 raw_spin_unlock_irq(&logbuf_lock);
1138 break; 1177 break;
@@ -1400,10 +1439,9 @@ asmlinkage int vprintk_emit(int facility, int level,
1400 static char textbuf[LOG_LINE_MAX]; 1439 static char textbuf[LOG_LINE_MAX];
1401 char *text = textbuf; 1440 char *text = textbuf;
1402 size_t text_len; 1441 size_t text_len;
1442 enum log_flags lflags = 0;
1403 unsigned long flags; 1443 unsigned long flags;
1404 int this_cpu; 1444 int this_cpu;
1405 bool newline = false;
1406 bool prefix = false;
1407 int printed_len = 0; 1445 int printed_len = 0;
1408 1446
1409 boot_delay_msec(); 1447 boot_delay_msec();
@@ -1442,7 +1480,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1442 recursion_bug = 0; 1480 recursion_bug = 0;
1443 printed_len += strlen(recursion_msg); 1481 printed_len += strlen(recursion_msg);
1444 /* emit KERN_CRIT message */ 1482 /* emit KERN_CRIT message */
1445 log_store(0, 2, LOG_DEFAULT, 0, 1483 log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
1446 NULL, 0, recursion_msg, printed_len); 1484 NULL, 0, recursion_msg, printed_len);
1447 } 1485 }
1448 1486
@@ -1455,7 +1493,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1455 /* mark and strip a trailing newline */ 1493 /* mark and strip a trailing newline */
1456 if (text_len && text[text_len-1] == '\n') { 1494 if (text_len && text[text_len-1] == '\n') {
1457 text_len--; 1495 text_len--;
1458 newline = true; 1496 lflags |= LOG_NEWLINE;
1459 } 1497 }
1460 1498
1461 /* strip syslog prefix and extract log level or control flags */ 1499 /* strip syslog prefix and extract log level or control flags */
@@ -1465,7 +1503,7 @@ asmlinkage int vprintk_emit(int facility, int level,
1465 if (level == -1) 1503 if (level == -1)
1466 level = text[1] - '0'; 1504 level = text[1] - '0';
1467 case 'd': /* KERN_DEFAULT */ 1505 case 'd': /* KERN_DEFAULT */
1468 prefix = true; 1506 lflags |= LOG_PREFIX;
1469 case 'c': /* KERN_CONT */ 1507 case 'c': /* KERN_CONT */
1470 text += 3; 1508 text += 3;
1471 text_len -= 3; 1509 text_len -= 3;
@@ -1475,22 +1513,20 @@ asmlinkage int vprintk_emit(int facility, int level,
1475 if (level == -1) 1513 if (level == -1)
1476 level = default_message_loglevel; 1514 level = default_message_loglevel;
1477 1515
1478 if (dict) { 1516 if (dict)
1479 prefix = true; 1517 lflags |= LOG_PREFIX|LOG_NEWLINE;
1480 newline = true;
1481 }
1482 1518
1483 if (!newline) { 1519 if (!(lflags & LOG_NEWLINE)) {
1484 /* 1520 /*
1485 * Flush the conflicting buffer. An earlier newline was missing, 1521 * Flush the conflicting buffer. An earlier newline was missing,
1486 * or another task also prints continuation lines. 1522 * or another task also prints continuation lines.
1487 */ 1523 */
1488 if (cont.len && (prefix || cont.owner != current)) 1524 if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
1489 cont_flush(); 1525 cont_flush();
1490 1526
1491 /* buffer line if possible, otherwise store it right away */ 1527 /* buffer line if possible, otherwise store it right away */
1492 if (!cont_add(facility, level, text, text_len)) 1528 if (!cont_add(facility, level, text, text_len))
1493 log_store(facility, level, LOG_DEFAULT, 0, 1529 log_store(facility, level, lflags | LOG_CONT, 0,
1494 dict, dictlen, text, text_len); 1530 dict, dictlen, text, text_len);
1495 } else { 1531 } else {
1496 bool stored = false; 1532 bool stored = false;
@@ -1502,13 +1538,13 @@ asmlinkage int vprintk_emit(int facility, int level,
1502 * flush it out and store this line separately. 1538 * flush it out and store this line separately.
1503 */ 1539 */
1504 if (cont.len && cont.owner == current) { 1540 if (cont.len && cont.owner == current) {
1505 if (!prefix) 1541 if (!(lflags & LOG_PREFIX))
1506 stored = cont_add(facility, level, text, text_len); 1542 stored = cont_add(facility, level, text, text_len);
1507 cont_flush(); 1543 cont_flush();
1508 } 1544 }
1509 1545
1510 if (!stored) 1546 if (!stored)
1511 log_store(facility, level, LOG_DEFAULT, 0, 1547 log_store(facility, level, lflags, 0,
1512 dict, dictlen, text, text_len); 1548 dict, dictlen, text, text_len);
1513 } 1549 }
1514 printed_len += text_len; 1550 printed_len += text_len;
@@ -1607,8 +1643,8 @@ static struct cont {
1607static struct log *log_from_idx(u32 idx) { return NULL; } 1643static struct log *log_from_idx(u32 idx) { return NULL; }
1608static u32 log_next(u32 idx) { return 0; } 1644static u32 log_next(u32 idx) { return 0; }
1609static void call_console_drivers(int level, const char *text, size_t len) {} 1645static void call_console_drivers(int level, const char *text, size_t len) {}
1610static size_t msg_print_text(const struct log *msg, bool syslog, 1646static size_t msg_print_text(const struct log *msg, enum log_flags prev,
1611 char *buf, size_t size) { return 0; } 1647 bool syslog, char *buf, size_t size) { return 0; }
1612static size_t cont_print_text(char *text, size_t size) { return 0; } 1648static size_t cont_print_text(char *text, size_t size) { return 0; }
1613 1649
1614#endif /* CONFIG_PRINTK */ 1650#endif /* CONFIG_PRINTK */
@@ -1884,6 +1920,7 @@ void wake_up_klogd(void)
1884/* the next printk record to write to the console */ 1920/* the next printk record to write to the console */
1885static u64 console_seq; 1921static u64 console_seq;
1886static u32 console_idx; 1922static u32 console_idx;
1923static enum log_flags console_prev;
1887 1924
1888/** 1925/**
1889 * console_unlock - unlock the console system 1926 * console_unlock - unlock the console system
@@ -1944,6 +1981,7 @@ again:
1944 /* messages are gone, move to first one */ 1981 /* messages are gone, move to first one */
1945 console_seq = log_first_seq; 1982 console_seq = log_first_seq;
1946 console_idx = log_first_idx; 1983 console_idx = log_first_idx;
1984 console_prev = 0;
1947 } 1985 }
1948skip: 1986skip:
1949 if (console_seq == log_next_seq) 1987 if (console_seq == log_next_seq)
@@ -1957,14 +1995,21 @@ skip:
1957 */ 1995 */
1958 console_idx = log_next(console_idx); 1996 console_idx = log_next(console_idx);
1959 console_seq++; 1997 console_seq++;
1998 /*
1999 * We will get here again when we register a new
2000 * CON_PRINTBUFFER console. Clear the flag so we
2001 * will properly dump everything later.
2002 */
2003 msg->flags &= ~LOG_NOCONS;
1960 goto skip; 2004 goto skip;
1961 } 2005 }
1962 2006
1963 level = msg->level; 2007 level = msg->level;
1964 len = msg_print_text(msg, false, text, sizeof(text)); 2008 len = msg_print_text(msg, console_prev, false,
1965 2009 text, sizeof(text));
1966 console_idx = log_next(console_idx); 2010 console_idx = log_next(console_idx);
1967 console_seq++; 2011 console_seq++;
2012 console_prev = msg->flags;
1968 raw_spin_unlock(&logbuf_lock); 2013 raw_spin_unlock(&logbuf_lock);
1969 2014
1970 stop_critical_timings(); /* don't trace print latency */ 2015 stop_critical_timings(); /* don't trace print latency */
@@ -2227,6 +2272,7 @@ void register_console(struct console *newcon)
2227 raw_spin_lock_irqsave(&logbuf_lock, flags); 2272 raw_spin_lock_irqsave(&logbuf_lock, flags);
2228 console_seq = syslog_seq; 2273 console_seq = syslog_seq;
2229 console_idx = syslog_idx; 2274 console_idx = syslog_idx;
2275 console_prev = syslog_prev;
2230 raw_spin_unlock_irqrestore(&logbuf_lock, flags); 2276 raw_spin_unlock_irqrestore(&logbuf_lock, flags);
2231 /* 2277 /*
2232 * We're about to replay the log buffer. Only do this to the 2278 * We're about to replay the log buffer. Only do this to the
@@ -2520,8 +2566,7 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
2520 } 2566 }
2521 2567
2522 msg = log_from_idx(dumper->cur_idx); 2568 msg = log_from_idx(dumper->cur_idx);
2523 l = msg_print_text(msg, syslog, 2569 l = msg_print_text(msg, 0, syslog, line, size);
2524 line, size);
2525 2570
2526 dumper->cur_idx = log_next(dumper->cur_idx); 2571 dumper->cur_idx = log_next(dumper->cur_idx);
2527 dumper->cur_seq++; 2572 dumper->cur_seq++;
@@ -2561,6 +2606,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2561 u32 idx; 2606 u32 idx;
2562 u64 next_seq; 2607 u64 next_seq;
2563 u32 next_idx; 2608 u32 next_idx;
2609 enum log_flags prev;
2564 size_t l = 0; 2610 size_t l = 0;
2565 bool ret = false; 2611 bool ret = false;
2566 2612
@@ -2583,23 +2629,27 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2583 /* calculate length of entire buffer */ 2629 /* calculate length of entire buffer */
2584 seq = dumper->cur_seq; 2630 seq = dumper->cur_seq;
2585 idx = dumper->cur_idx; 2631 idx = dumper->cur_idx;
2632 prev = 0;
2586 while (seq < dumper->next_seq) { 2633 while (seq < dumper->next_seq) {
2587 struct log *msg = log_from_idx(idx); 2634 struct log *msg = log_from_idx(idx);
2588 2635
2589 l += msg_print_text(msg, true, NULL, 0); 2636 l += msg_print_text(msg, prev, true, NULL, 0);
2590 idx = log_next(idx); 2637 idx = log_next(idx);
2591 seq++; 2638 seq++;
2639 prev = msg->flags;
2592 } 2640 }
2593 2641
2594 /* move first record forward until length fits into the buffer */ 2642 /* move first record forward until length fits into the buffer */
2595 seq = dumper->cur_seq; 2643 seq = dumper->cur_seq;
2596 idx = dumper->cur_idx; 2644 idx = dumper->cur_idx;
2645 prev = 0;
2597 while (l > size && seq < dumper->next_seq) { 2646 while (l > size && seq < dumper->next_seq) {
2598 struct log *msg = log_from_idx(idx); 2647 struct log *msg = log_from_idx(idx);
2599 2648
2600 l -= msg_print_text(msg, true, NULL, 0); 2649 l -= msg_print_text(msg, prev, true, NULL, 0);
2601 idx = log_next(idx); 2650 idx = log_next(idx);
2602 seq++; 2651 seq++;
2652 prev = msg->flags;
2603 } 2653 }
2604 2654
2605 /* last message in next interation */ 2655 /* last message in next interation */
@@ -2607,14 +2657,14 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2607 next_idx = idx; 2657 next_idx = idx;
2608 2658
2609 l = 0; 2659 l = 0;
2660 prev = 0;
2610 while (seq < dumper->next_seq) { 2661 while (seq < dumper->next_seq) {
2611 struct log *msg = log_from_idx(idx); 2662 struct log *msg = log_from_idx(idx);
2612 2663
2613 l += msg_print_text(msg, syslog, 2664 l += msg_print_text(msg, prev, syslog, buf + l, size - l);
2614 buf + l, size - l);
2615
2616 idx = log_next(idx); 2665 idx = log_next(idx);
2617 seq++; 2666 seq++;
2667 prev = msg->flags;
2618 } 2668 }
2619 2669
2620 dumper->next_seq = next_seq; 2670 dumper->next_seq = next_seq;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 38ecdda3f55f..4b97bba7396e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -201,6 +201,7 @@ void rcu_note_context_switch(int cpu)
201{ 201{
202 trace_rcu_utilization("Start context switch"); 202 trace_rcu_utilization("Start context switch");
203 rcu_sched_qs(cpu); 203 rcu_sched_qs(cpu);
204 rcu_preempt_note_context_switch(cpu);
204 trace_rcu_utilization("End context switch"); 205 trace_rcu_utilization("End context switch");
205} 206}
206EXPORT_SYMBOL_GPL(rcu_note_context_switch); 207EXPORT_SYMBOL_GPL(rcu_note_context_switch);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index ea056495783e..19b61ac1079f 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -444,6 +444,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
444/* Forward declarations for rcutree_plugin.h */ 444/* Forward declarations for rcutree_plugin.h */
445static void rcu_bootup_announce(void); 445static void rcu_bootup_announce(void);
446long rcu_batches_completed(void); 446long rcu_batches_completed(void);
447static void rcu_preempt_note_context_switch(int cpu);
447static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 448static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
448#ifdef CONFIG_HOTPLUG_CPU 449#ifdef CONFIG_HOTPLUG_CPU
449static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 450static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 5271a020887e..3e4899459f3d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu)
153 * 153 *
154 * Caller must disable preemption. 154 * Caller must disable preemption.
155 */ 155 */
156void rcu_preempt_note_context_switch(void) 156static void rcu_preempt_note_context_switch(int cpu)
157{ 157{
158 struct task_struct *t = current; 158 struct task_struct *t = current;
159 unsigned long flags; 159 unsigned long flags;
@@ -164,7 +164,7 @@ void rcu_preempt_note_context_switch(void)
164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 164 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
165 165
166 /* Possibly blocking in an RCU read-side critical section. */ 166 /* Possibly blocking in an RCU read-side critical section. */
167 rdp = __this_cpu_ptr(rcu_preempt_state.rda); 167 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
168 rnp = rdp->mynode; 168 rnp = rdp->mynode;
169 raw_spin_lock_irqsave(&rnp->lock, flags); 169 raw_spin_lock_irqsave(&rnp->lock, flags);
170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 170 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -228,7 +228,7 @@ void rcu_preempt_note_context_switch(void)
228 * means that we continue to block the current grace period. 228 * means that we continue to block the current grace period.
229 */ 229 */
230 local_irq_save(flags); 230 local_irq_save(flags);
231 rcu_preempt_qs(smp_processor_id()); 231 rcu_preempt_qs(cpu);
232 local_irq_restore(flags); 232 local_irq_restore(flags);
233} 233}
234 234
@@ -1002,6 +1002,14 @@ void rcu_force_quiescent_state(void)
1002EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 1002EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
1003 1003
1004/* 1004/*
1005 * Because preemptible RCU does not exist, we never have to check for
1006 * CPUs being in quiescent states.
1007 */
1008static void rcu_preempt_note_context_switch(int cpu)
1009{
1010}
1011
1012/*
1005 * Because preemptible RCU does not exist, there are never any preempted 1013 * Because preemptible RCU does not exist, there are never any preempted
1006 * RCU readers. 1014 * RCU readers.
1007 */ 1015 */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d5594a4268d4..468bdd44c1ba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2081,7 +2081,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
2081#endif 2081#endif
2082 2082
2083 /* Here we just switch the register state and the stack. */ 2083 /* Here we just switch the register state and the stack. */
2084 rcu_switch_from(prev);
2085 switch_to(prev, next, prev); 2084 switch_to(prev, next, prev);
2086 2085
2087 barrier(); 2086 barrier();
@@ -2161,11 +2160,73 @@ unsigned long this_cpu_load(void)
2161} 2160}
2162 2161
2163 2162
2163/*
2164 * Global load-average calculations
2165 *
2166 * We take a distributed and async approach to calculating the global load-avg
2167 * in order to minimize overhead.
2168 *
2169 * The global load average is an exponentially decaying average of nr_running +
2170 * nr_uninterruptible.
2171 *
2172 * Once every LOAD_FREQ:
2173 *
2174 * nr_active = 0;
2175 * for_each_possible_cpu(cpu)
2176 * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;
2177 *
2178 * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
2179 *
2180 * Due to a number of reasons the above turns in the mess below:
2181 *
2182 * - for_each_possible_cpu() is prohibitively expensive on machines with
2183 * serious number of cpus, therefore we need to take a distributed approach
2184 * to calculating nr_active.
2185 *
2186 * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
2187 * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
2188 *
2189 * So assuming nr_active := 0 when we start out -- true per definition, we
2190 * can simply take per-cpu deltas and fold those into a global accumulate
2191 * to obtain the same result. See calc_load_fold_active().
2192 *
2193 * Furthermore, in order to avoid synchronizing all per-cpu delta folding
2194 * across the machine, we assume 10 ticks is sufficient time for every
2195 * cpu to have completed this task.
2196 *
2197 * This places an upper-bound on the IRQ-off latency of the machine. Then
2198 * again, being late doesn't loose the delta, just wrecks the sample.
2199 *
2200 * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because
2201 * this would add another cross-cpu cacheline miss and atomic operation
2202 * to the wakeup path. Instead we increment on whatever cpu the task ran
2203 * when it went into uninterruptible state and decrement on whatever cpu
2204 * did the wakeup. This means that only the sum of nr_uninterruptible over
2205 * all cpus yields the correct result.
2206 *
2207 * This covers the NO_HZ=n code, for extra head-aches, see the comment below.
2208 */
2209
2164/* Variables and functions for calc_load */ 2210/* Variables and functions for calc_load */
2165static atomic_long_t calc_load_tasks; 2211static atomic_long_t calc_load_tasks;
2166static unsigned long calc_load_update; 2212static unsigned long calc_load_update;
2167unsigned long avenrun[3]; 2213unsigned long avenrun[3];
2168EXPORT_SYMBOL(avenrun); 2214EXPORT_SYMBOL(avenrun); /* should be removed */
2215
2216/**
2217 * get_avenrun - get the load average array
2218 * @loads: pointer to dest load array
2219 * @offset: offset to add
2220 * @shift: shift count to shift the result left
2221 *
2222 * These values are estimates at best, so no need for locking.
2223 */
2224void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2225{
2226 loads[0] = (avenrun[0] + offset) << shift;
2227 loads[1] = (avenrun[1] + offset) << shift;
2228 loads[2] = (avenrun[2] + offset) << shift;
2229}
2169 2230
2170static long calc_load_fold_active(struct rq *this_rq) 2231static long calc_load_fold_active(struct rq *this_rq)
2171{ 2232{
@@ -2182,6 +2243,9 @@ static long calc_load_fold_active(struct rq *this_rq)
2182 return delta; 2243 return delta;
2183} 2244}
2184 2245
2246/*
2247 * a1 = a0 * e + a * (1 - e)
2248 */
2185static unsigned long 2249static unsigned long
2186calc_load(unsigned long load, unsigned long exp, unsigned long active) 2250calc_load(unsigned long load, unsigned long exp, unsigned long active)
2187{ 2251{
@@ -2193,30 +2257,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
2193 2257
2194#ifdef CONFIG_NO_HZ 2258#ifdef CONFIG_NO_HZ
2195/* 2259/*
2196 * For NO_HZ we delay the active fold to the next LOAD_FREQ update. 2260 * Handle NO_HZ for the global load-average.
2261 *
2262 * Since the above described distributed algorithm to compute the global
2263 * load-average relies on per-cpu sampling from the tick, it is affected by
2264 * NO_HZ.
2265 *
2266 * The basic idea is to fold the nr_active delta into a global idle-delta upon
2267 * entering NO_HZ state such that we can include this as an 'extra' cpu delta
2268 * when we read the global state.
2269 *
2270 * Obviously reality has to ruin such a delightfully simple scheme:
2271 *
2272 * - When we go NO_HZ idle during the window, we can negate our sample
2273 * contribution, causing under-accounting.
2274 *
2275 * We avoid this by keeping two idle-delta counters and flipping them
2276 * when the window starts, thus separating old and new NO_HZ load.
2277 *
2278 * The only trick is the slight shift in index flip for read vs write.
2279 *
2280 * 0s 5s 10s 15s
2281 * +10 +10 +10 +10
2282 * |-|-----------|-|-----------|-|-----------|-|
2283 * r:0 0 1 1 0 0 1 1 0
2284 * w:0 1 1 0 0 1 1 0 0
2285 *
2286 * This ensures we'll fold the old idle contribution in this window while
2287 * accumlating the new one.
2288 *
2289 * - When we wake up from NO_HZ idle during the window, we push up our
2290 * contribution, since we effectively move our sample point to a known
2291 * busy state.
2292 *
2293 * This is solved by pushing the window forward, and thus skipping the
2294 * sample, for this cpu (effectively using the idle-delta for this cpu which
2295 * was in effect at the time the window opened). This also solves the issue
2296 * of having to deal with a cpu having been in NOHZ idle for multiple
2297 * LOAD_FREQ intervals.
2197 * 2298 *
2198 * When making the ILB scale, we should try to pull this in as well. 2299 * When making the ILB scale, we should try to pull this in as well.
2199 */ 2300 */
2200static atomic_long_t calc_load_tasks_idle; 2301static atomic_long_t calc_load_idle[2];
2302static int calc_load_idx;
2201 2303
2202void calc_load_account_idle(struct rq *this_rq) 2304static inline int calc_load_write_idx(void)
2203{ 2305{
2306 int idx = calc_load_idx;
2307
2308 /*
2309 * See calc_global_nohz(), if we observe the new index, we also
2310 * need to observe the new update time.
2311 */
2312 smp_rmb();
2313
2314 /*
2315 * If the folding window started, make sure we start writing in the
2316 * next idle-delta.
2317 */
2318 if (!time_before(jiffies, calc_load_update))
2319 idx++;
2320
2321 return idx & 1;
2322}
2323
2324static inline int calc_load_read_idx(void)
2325{
2326 return calc_load_idx & 1;
2327}
2328
2329void calc_load_enter_idle(void)
2330{
2331 struct rq *this_rq = this_rq();
2204 long delta; 2332 long delta;
2205 2333
2334 /*
2335 * We're going into NOHZ mode, if there's any pending delta, fold it
2336 * into the pending idle delta.
2337 */
2206 delta = calc_load_fold_active(this_rq); 2338 delta = calc_load_fold_active(this_rq);
2207 if (delta) 2339 if (delta) {
2208 atomic_long_add(delta, &calc_load_tasks_idle); 2340 int idx = calc_load_write_idx();
2341 atomic_long_add(delta, &calc_load_idle[idx]);
2342 }
2209} 2343}
2210 2344
2211static long calc_load_fold_idle(void) 2345void calc_load_exit_idle(void)
2212{ 2346{
2213 long delta = 0; 2347 struct rq *this_rq = this_rq();
2348
2349 /*
2350 * If we're still before the sample window, we're done.
2351 */
2352 if (time_before(jiffies, this_rq->calc_load_update))
2353 return;
2214 2354
2215 /* 2355 /*
2216 * Its got a race, we don't care... 2356 * We woke inside or after the sample window, this means we're already
2357 * accounted through the nohz accounting, so skip the entire deal and
2358 * sync up for the next window.
2217 */ 2359 */
2218 if (atomic_long_read(&calc_load_tasks_idle)) 2360 this_rq->calc_load_update = calc_load_update;
2219 delta = atomic_long_xchg(&calc_load_tasks_idle, 0); 2361 if (time_before(jiffies, this_rq->calc_load_update + 10))
2362 this_rq->calc_load_update += LOAD_FREQ;
2363}
2364
2365static long calc_load_fold_idle(void)
2366{
2367 int idx = calc_load_read_idx();
2368 long delta = 0;
2369
2370 if (atomic_long_read(&calc_load_idle[idx]))
2371 delta = atomic_long_xchg(&calc_load_idle[idx], 0);
2220 2372
2221 return delta; 2373 return delta;
2222} 2374}
@@ -2302,66 +2454,39 @@ static void calc_global_nohz(void)
2302{ 2454{
2303 long delta, active, n; 2455 long delta, active, n;
2304 2456
2305 /* 2457 if (!time_before(jiffies, calc_load_update + 10)) {
2306 * If we crossed a calc_load_update boundary, make sure to fold 2458 /*
2307 * any pending idle changes, the respective CPUs might have 2459 * Catch-up, fold however many we are behind still
2308 * missed the tick driven calc_load_account_active() update 2460 */
2309 * due to NO_HZ. 2461 delta = jiffies - calc_load_update - 10;
2310 */ 2462 n = 1 + (delta / LOAD_FREQ);
2311 delta = calc_load_fold_idle();
2312 if (delta)
2313 atomic_long_add(delta, &calc_load_tasks);
2314
2315 /*
2316 * It could be the one fold was all it took, we done!
2317 */
2318 if (time_before(jiffies, calc_load_update + 10))
2319 return;
2320
2321 /*
2322 * Catch-up, fold however many we are behind still
2323 */
2324 delta = jiffies - calc_load_update - 10;
2325 n = 1 + (delta / LOAD_FREQ);
2326 2463
2327 active = atomic_long_read(&calc_load_tasks); 2464 active = atomic_long_read(&calc_load_tasks);
2328 active = active > 0 ? active * FIXED_1 : 0; 2465 active = active > 0 ? active * FIXED_1 : 0;
2329 2466
2330 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); 2467 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
2331 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); 2468 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
2332 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); 2469 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
2333 2470
2334 calc_load_update += n * LOAD_FREQ; 2471 calc_load_update += n * LOAD_FREQ;
2335} 2472 }
2336#else
2337void calc_load_account_idle(struct rq *this_rq)
2338{
2339}
2340 2473
2341static inline long calc_load_fold_idle(void) 2474 /*
2342{ 2475 * Flip the idle index...
2343 return 0; 2476 *
2477 * Make sure we first write the new time then flip the index, so that
2478 * calc_load_write_idx() will see the new time when it reads the new
2479 * index, this avoids a double flip messing things up.
2480 */
2481 smp_wmb();
2482 calc_load_idx++;
2344} 2483}
2484#else /* !CONFIG_NO_HZ */
2345 2485
2346static void calc_global_nohz(void) 2486static inline long calc_load_fold_idle(void) { return 0; }
2347{ 2487static inline void calc_global_nohz(void) { }
2348}
2349#endif
2350 2488
2351/** 2489#endif /* CONFIG_NO_HZ */
2352 * get_avenrun - get the load average array
2353 * @loads: pointer to dest load array
2354 * @offset: offset to add
2355 * @shift: shift count to shift the result left
2356 *
2357 * These values are estimates at best, so no need for locking.
2358 */
2359void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2360{
2361 loads[0] = (avenrun[0] + offset) << shift;
2362 loads[1] = (avenrun[1] + offset) << shift;
2363 loads[2] = (avenrun[2] + offset) << shift;
2364}
2365 2490
2366/* 2491/*
2367 * calc_load - update the avenrun load estimates 10 ticks after the 2492 * calc_load - update the avenrun load estimates 10 ticks after the
@@ -2369,11 +2494,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
2369 */ 2494 */
2370void calc_global_load(unsigned long ticks) 2495void calc_global_load(unsigned long ticks)
2371{ 2496{
2372 long active; 2497 long active, delta;
2373 2498
2374 if (time_before(jiffies, calc_load_update + 10)) 2499 if (time_before(jiffies, calc_load_update + 10))
2375 return; 2500 return;
2376 2501
2502 /*
2503 * Fold the 'old' idle-delta to include all NO_HZ cpus.
2504 */
2505 delta = calc_load_fold_idle();
2506 if (delta)
2507 atomic_long_add(delta, &calc_load_tasks);
2508
2377 active = atomic_long_read(&calc_load_tasks); 2509 active = atomic_long_read(&calc_load_tasks);
2378 active = active > 0 ? active * FIXED_1 : 0; 2510 active = active > 0 ? active * FIXED_1 : 0;
2379 2511
@@ -2384,12 +2516,7 @@ void calc_global_load(unsigned long ticks)
2384 calc_load_update += LOAD_FREQ; 2516 calc_load_update += LOAD_FREQ;
2385 2517
2386 /* 2518 /*
2387 * Account one period with whatever state we found before 2519 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
2388 * folding in the nohz state and ageing the entire idle period.
2389 *
2390 * This avoids loosing a sample when we go idle between
2391 * calc_load_account_active() (10 ticks ago) and now and thus
2392 * under-accounting.
2393 */ 2520 */
2394 calc_global_nohz(); 2521 calc_global_nohz();
2395} 2522}
@@ -2406,7 +2533,6 @@ static void calc_load_account_active(struct rq *this_rq)
2406 return; 2533 return;
2407 2534
2408 delta = calc_load_fold_active(this_rq); 2535 delta = calc_load_fold_active(this_rq);
2409 delta += calc_load_fold_idle();
2410 if (delta) 2536 if (delta)
2411 atomic_long_add(delta, &calc_load_tasks); 2537 atomic_long_add(delta, &calc_load_tasks);
2412 2538
@@ -2414,6 +2540,10 @@ static void calc_load_account_active(struct rq *this_rq)
2414} 2540}
2415 2541
2416/* 2542/*
2543 * End of global load-average stuff
2544 */
2545
2546/*
2417 * The exact cpuload at various idx values, calculated at every tick would be 2547 * The exact cpuload at various idx values, calculated at every tick would be
2418 * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load 2548 * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
2419 * 2549 *
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index b44d604b35d1..b6baf370cae9 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -25,7 +25,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
25static struct task_struct *pick_next_task_idle(struct rq *rq) 25static struct task_struct *pick_next_task_idle(struct rq *rq)
26{ 26{
27 schedstat_inc(rq, sched_goidle); 27 schedstat_inc(rq, sched_goidle);
28 calc_load_account_idle(rq);
29 return rq->idle; 28 return rq->idle;
30} 29}
31 30
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6d52cea7f33d..55844f24435a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -942,8 +942,6 @@ static inline u64 sched_avg_period(void)
942 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; 942 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
943} 943}
944 944
945void calc_load_account_idle(struct rq *this_rq);
946
947#ifdef CONFIG_SCHED_HRTICK 945#ifdef CONFIG_SCHED_HRTICK
948 946
949/* 947/*
diff --git a/kernel/sys.c b/kernel/sys.c
index e0c8ffc50d7f..2d39a84cd857 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1788,7 +1788,6 @@ SYSCALL_DEFINE1(umask, int, mask)
1788#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1790{ 1790{
1791 struct vm_area_struct *vma;
1792 struct file *exe_file; 1791 struct file *exe_file;
1793 struct dentry *dentry; 1792 struct dentry *dentry;
1794 int err; 1793 int err;
@@ -1816,13 +1815,17 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1816 down_write(&mm->mmap_sem); 1815 down_write(&mm->mmap_sem);
1817 1816
1818 /* 1817 /*
1819 * Forbid mm->exe_file change if there are mapped other files. 1818 * Forbid mm->exe_file change if old file still mapped.
1820 */ 1819 */
1821 err = -EBUSY; 1820 err = -EBUSY;
1822 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1821 if (mm->exe_file) {
1823 if (vma->vm_file && !path_equal(&vma->vm_file->f_path, 1822 struct vm_area_struct *vma;
1824 &exe_file->f_path)) 1823
1825 goto exit_unlock; 1824 for (vma = mm->mmap; vma; vma = vma->vm_next)
1825 if (vma->vm_file &&
1826 path_equal(&vma->vm_file->f_path,
1827 &mm->exe_file->f_path))
1828 goto exit_unlock;
1826 } 1829 }
1827 1830
1828 /* 1831 /*
@@ -1835,6 +1838,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1835 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1838 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1836 goto exit_unlock; 1839 goto exit_unlock;
1837 1840
1841 err = 0;
1838 set_mm_exe_file(mm, exe_file); 1842 set_mm_exe_file(mm, exe_file);
1839exit_unlock: 1843exit_unlock:
1840 up_write(&mm->mmap_sem); 1844 up_write(&mm->mmap_sem);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 869997833928..4a08472c3ca7 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -406,6 +406,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
406 */ 406 */
407 if (!ts->tick_stopped) { 407 if (!ts->tick_stopped) {
408 select_nohz_load_balancer(1); 408 select_nohz_load_balancer(1);
409 calc_load_enter_idle();
409 410
410 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); 411 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
411 ts->tick_stopped = 1; 412 ts->tick_stopped = 1;
@@ -597,6 +598,7 @@ void tick_nohz_idle_exit(void)
597 account_idle_ticks(ticks); 598 account_idle_ticks(ticks);
598#endif 599#endif
599 600
601 calc_load_exit_idle();
600 touch_softlockup_watchdog(); 602 touch_softlockup_watchdog();
601 /* 603 /*
602 * Cancel the scheduled timer and restore the tick 604 * Cancel the scheduled timer and restore the tick
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6f46a00a1e8a..3447cfaf11e7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -70,6 +70,12 @@ struct timekeeper {
70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ 70 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
71 struct timespec raw_time; 71 struct timespec raw_time;
72 72
73 /* Offset clock monotonic -> clock realtime */
74 ktime_t offs_real;
75
76 /* Offset clock monotonic -> clock boottime */
77 ktime_t offs_boot;
78
73 /* Seqlock for all timekeeper values */ 79 /* Seqlock for all timekeeper values */
74 seqlock_t lock; 80 seqlock_t lock;
75}; 81};
@@ -172,6 +178,14 @@ static inline s64 timekeeping_get_ns_raw(void)
172 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); 178 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
173} 179}
174 180
181static void update_rt_offset(void)
182{
183 struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic;
184
185 set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
186 timekeeper.offs_real = timespec_to_ktime(tmp);
187}
188
175/* must hold write on timekeeper.lock */ 189/* must hold write on timekeeper.lock */
176static void timekeeping_update(bool clearntp) 190static void timekeeping_update(bool clearntp)
177{ 191{
@@ -179,6 +193,7 @@ static void timekeeping_update(bool clearntp)
179 timekeeper.ntp_error = 0; 193 timekeeper.ntp_error = 0;
180 ntp_clear(); 194 ntp_clear();
181 } 195 }
196 update_rt_offset();
182 update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, 197 update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic,
183 timekeeper.clock, timekeeper.mult); 198 timekeeper.clock, timekeeper.mult);
184} 199}
@@ -604,6 +619,7 @@ void __init timekeeping_init(void)
604 } 619 }
605 set_normalized_timespec(&timekeeper.wall_to_monotonic, 620 set_normalized_timespec(&timekeeper.wall_to_monotonic,
606 -boot.tv_sec, -boot.tv_nsec); 621 -boot.tv_sec, -boot.tv_nsec);
622 update_rt_offset();
607 timekeeper.total_sleep_time.tv_sec = 0; 623 timekeeper.total_sleep_time.tv_sec = 0;
608 timekeeper.total_sleep_time.tv_nsec = 0; 624 timekeeper.total_sleep_time.tv_nsec = 0;
609 write_sequnlock_irqrestore(&timekeeper.lock, flags); 625 write_sequnlock_irqrestore(&timekeeper.lock, flags);
@@ -612,6 +628,12 @@ void __init timekeeping_init(void)
612/* time in seconds when suspend began */ 628/* time in seconds when suspend began */
613static struct timespec timekeeping_suspend_time; 629static struct timespec timekeeping_suspend_time;
614 630
631static void update_sleep_time(struct timespec t)
632{
633 timekeeper.total_sleep_time = t;
634 timekeeper.offs_boot = timespec_to_ktime(t);
635}
636
615/** 637/**
616 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 638 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
617 * @delta: pointer to a timespec delta value 639 * @delta: pointer to a timespec delta value
@@ -630,8 +652,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta)
630 timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); 652 timekeeper.xtime = timespec_add(timekeeper.xtime, *delta);
631 timekeeper.wall_to_monotonic = 653 timekeeper.wall_to_monotonic =
632 timespec_sub(timekeeper.wall_to_monotonic, *delta); 654 timespec_sub(timekeeper.wall_to_monotonic, *delta);
633 timekeeper.total_sleep_time = timespec_add( 655 update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta));
634 timekeeper.total_sleep_time, *delta);
635} 656}
636 657
637 658
@@ -696,6 +717,7 @@ static void timekeeping_resume(void)
696 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); 717 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
697 timekeeper.ntp_error = 0; 718 timekeeper.ntp_error = 0;
698 timekeeping_suspended = 0; 719 timekeeping_suspended = 0;
720 timekeeping_update(false);
699 write_sequnlock_irqrestore(&timekeeper.lock, flags); 721 write_sequnlock_irqrestore(&timekeeper.lock, flags);
700 722
701 touch_softlockup_watchdog(); 723 touch_softlockup_watchdog();
@@ -963,6 +985,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
963 leap = second_overflow(timekeeper.xtime.tv_sec); 985 leap = second_overflow(timekeeper.xtime.tv_sec);
964 timekeeper.xtime.tv_sec += leap; 986 timekeeper.xtime.tv_sec += leap;
965 timekeeper.wall_to_monotonic.tv_sec -= leap; 987 timekeeper.wall_to_monotonic.tv_sec -= leap;
988 if (leap)
989 clock_was_set_delayed();
966 } 990 }
967 991
968 /* Accumulate raw time */ 992 /* Accumulate raw time */
@@ -1079,6 +1103,8 @@ static void update_wall_time(void)
1079 leap = second_overflow(timekeeper.xtime.tv_sec); 1103 leap = second_overflow(timekeeper.xtime.tv_sec);
1080 timekeeper.xtime.tv_sec += leap; 1104 timekeeper.xtime.tv_sec += leap;
1081 timekeeper.wall_to_monotonic.tv_sec -= leap; 1105 timekeeper.wall_to_monotonic.tv_sec -= leap;
1106 if (leap)
1107 clock_was_set_delayed();
1082 } 1108 }
1083 1109
1084 timekeeping_update(false); 1110 timekeeping_update(false);
@@ -1246,6 +1272,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1246 } while (read_seqretry(&timekeeper.lock, seq)); 1272 } while (read_seqretry(&timekeeper.lock, seq));
1247} 1273}
1248 1274
1275#ifdef CONFIG_HIGH_RES_TIMERS
1276/**
1277 * ktime_get_update_offsets - hrtimer helper
1278 * @offs_real: pointer to storage for monotonic -> realtime offset
1279 * @offs_boot: pointer to storage for monotonic -> boottime offset
1280 *
1281 * Returns current monotonic time and updates the offsets
1282 * Called from hrtimer_interupt() or retrigger_next_event()
1283 */
1284ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1285{
1286 ktime_t now;
1287 unsigned int seq;
1288 u64 secs, nsecs;
1289
1290 do {
1291 seq = read_seqbegin(&timekeeper.lock);
1292
1293 secs = timekeeper.xtime.tv_sec;
1294 nsecs = timekeeper.xtime.tv_nsec;
1295 nsecs += timekeeping_get_ns();
1296 /* If arch requires, add in gettimeoffset() */
1297 nsecs += arch_gettimeoffset();
1298
1299 *offs_real = timekeeper.offs_real;
1300 *offs_boot = timekeeper.offs_boot;
1301 } while (read_seqretry(&timekeeper.lock, seq));
1302
1303 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1304 now = ktime_sub(now, *offs_real);
1305 return now;
1306}
1307#endif
1308
1249/** 1309/**
1250 * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format 1310 * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
1251 */ 1311 */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 82a3e0c56b1d..49491fa7daa2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1075,6 +1075,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1075 rb_init_page(bpage->page); 1075 rb_init_page(bpage->page);
1076 1076
1077 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1077 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1078 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1078 1079
1079 ret = rb_allocate_pages(cpu_buffer, nr_pages); 1080 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1080 if (ret < 0) 1081 if (ret < 0)
@@ -1346,10 +1347,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1346 * If something was added to this page, it was full 1347 * If something was added to this page, it was full
1347 * since it is not the tail page. So we deduct the 1348 * since it is not the tail page. So we deduct the
1348 * bytes consumed in ring buffer from here. 1349 * bytes consumed in ring buffer from here.
1349 * No need to update overruns, since this page is 1350 * Increment overrun to account for the lost events.
1350 * deleted from ring buffer and its entries are
1351 * already accounted for.
1352 */ 1351 */
1352 local_add(page_entries, &cpu_buffer->overrun);
1353 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); 1353 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1354 } 1354 }
1355 1355