diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-07-18 05:17:17 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-07-18 05:17:17 -0400 |
commit | a2fe194723f6e4990d01d8c208c7b138fd410522 (patch) | |
tree | 7aee93fa8f4ba1e18b56fa7d8eab75d249fc6966 /kernel | |
parent | c3b7cdf180090d2686239a75bb0ae408108ed749 (diff) | |
parent | a018540141a931f5299a866907b27886916b4374 (diff) |
Merge branch 'linus' into perf/core
Pick up the latest ring-buffer fixes, before applying a new fix.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 23 | ||||
-rw-r--r-- | kernel/fork.c | 11 | ||||
-rw-r--r-- | kernel/hrtimer.c | 53 | ||||
-rw-r--r-- | kernel/printk.c | 202 | ||||
-rw-r--r-- | kernel/rcutree.c | 1 | ||||
-rw-r--r-- | kernel/rcutree.h | 1 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 14 | ||||
-rw-r--r-- | kernel/sched/core.c | 276 | ||||
-rw-r--r-- | kernel/sched/idle_task.c | 1 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 | ||||
-rw-r--r-- | kernel/sys.c | 16 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 64 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 6 |
14 files changed, 472 insertions, 200 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2097684cf194..b303dfc7dce0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -901,13 +901,10 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
901 | mutex_unlock(&cgroup_mutex); | 901 | mutex_unlock(&cgroup_mutex); |
902 | 902 | ||
903 | /* | 903 | /* |
904 | * We want to drop the active superblock reference from the | 904 | * Drop the active superblock reference that we took when we |
905 | * cgroup creation after all the dentry refs are gone - | 905 | * created the cgroup |
906 | * kill_sb gets mighty unhappy otherwise. Mark | ||
907 | * dentry->d_fsdata with cgroup_diput() to tell | ||
908 | * cgroup_d_release() to call deactivate_super(). | ||
909 | */ | 906 | */ |
910 | dentry->d_fsdata = cgroup_diput; | 907 | deactivate_super(cgrp->root->sb); |
911 | 908 | ||
912 | /* | 909 | /* |
913 | * if we're getting rid of the cgroup, refcount should ensure | 910 | * if we're getting rid of the cgroup, refcount should ensure |
@@ -933,13 +930,6 @@ static int cgroup_delete(const struct dentry *d) | |||
933 | return 1; | 930 | return 1; |
934 | } | 931 | } |
935 | 932 | ||
936 | static void cgroup_d_release(struct dentry *dentry) | ||
937 | { | ||
938 | /* did cgroup_diput() tell me to deactivate super? */ | ||
939 | if (dentry->d_fsdata == cgroup_diput) | ||
940 | deactivate_super(dentry->d_sb); | ||
941 | } | ||
942 | |||
943 | static void remove_dir(struct dentry *d) | 933 | static void remove_dir(struct dentry *d) |
944 | { | 934 | { |
945 | struct dentry *parent = dget(d->d_parent); | 935 | struct dentry *parent = dget(d->d_parent); |
@@ -1547,7 +1537,6 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1547 | static const struct dentry_operations cgroup_dops = { | 1537 | static const struct dentry_operations cgroup_dops = { |
1548 | .d_iput = cgroup_diput, | 1538 | .d_iput = cgroup_diput, |
1549 | .d_delete = cgroup_delete, | 1539 | .d_delete = cgroup_delete, |
1550 | .d_release = cgroup_d_release, | ||
1551 | }; | 1540 | }; |
1552 | 1541 | ||
1553 | struct inode *inode = | 1542 | struct inode *inode = |
@@ -3894,8 +3883,12 @@ static void css_dput_fn(struct work_struct *work) | |||
3894 | { | 3883 | { |
3895 | struct cgroup_subsys_state *css = | 3884 | struct cgroup_subsys_state *css = |
3896 | container_of(work, struct cgroup_subsys_state, dput_work); | 3885 | container_of(work, struct cgroup_subsys_state, dput_work); |
3886 | struct dentry *dentry = css->cgroup->dentry; | ||
3887 | struct super_block *sb = dentry->d_sb; | ||
3897 | 3888 | ||
3898 | dput(css->cgroup->dentry); | 3889 | atomic_inc(&sb->s_active); |
3890 | dput(dentry); | ||
3891 | deactivate_super(sb); | ||
3899 | } | 3892 | } |
3900 | 3893 | ||
3901 | static void init_cgroup_css(struct cgroup_subsys_state *css, | 3894 | static void init_cgroup_css(struct cgroup_subsys_state *css, |
diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e622..f00e319d8376 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -304,12 +304,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
304 | } | 304 | } |
305 | 305 | ||
306 | err = arch_dup_task_struct(tsk, orig); | 306 | err = arch_dup_task_struct(tsk, orig); |
307 | if (err) | ||
308 | goto out; | ||
309 | 307 | ||
308 | /* | ||
309 | * We defer looking at err, because we will need this setup | ||
310 | * for the clean up path to work correctly. | ||
311 | */ | ||
310 | tsk->stack = ti; | 312 | tsk->stack = ti; |
311 | |||
312 | setup_thread_stack(tsk, orig); | 313 | setup_thread_stack(tsk, orig); |
314 | |||
315 | if (err) | ||
316 | goto out; | ||
317 | |||
313 | clear_user_return_notifier(tsk); | 318 | clear_user_return_notifier(tsk); |
314 | clear_tsk_need_resched(tsk); | 319 | clear_tsk_need_resched(tsk); |
315 | stackend = end_of_stack(tsk); | 320 | stackend = end_of_stack(tsk); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ae34bf51682b..6db7a5ed52b5 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -657,6 +657,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
657 | return 0; | 657 | return 0; |
658 | } | 658 | } |
659 | 659 | ||
660 | static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) | ||
661 | { | ||
662 | ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; | ||
663 | ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; | ||
664 | |||
665 | return ktime_get_update_offsets(offs_real, offs_boot); | ||
666 | } | ||
667 | |||
660 | /* | 668 | /* |
661 | * Retrigger next event is called after clock was set | 669 | * Retrigger next event is called after clock was set |
662 | * | 670 | * |
@@ -665,22 +673,12 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
665 | static void retrigger_next_event(void *arg) | 673 | static void retrigger_next_event(void *arg) |
666 | { | 674 | { |
667 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | 675 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); |
668 | struct timespec realtime_offset, xtim, wtm, sleep; | ||
669 | 676 | ||
670 | if (!hrtimer_hres_active()) | 677 | if (!hrtimer_hres_active()) |
671 | return; | 678 | return; |
672 | 679 | ||
673 | /* Optimized out for !HIGH_RES */ | ||
674 | get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); | ||
675 | set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); | ||
676 | |||
677 | /* Adjust CLOCK_REALTIME offset */ | ||
678 | raw_spin_lock(&base->lock); | 680 | raw_spin_lock(&base->lock); |
679 | base->clock_base[HRTIMER_BASE_REALTIME].offset = | 681 | hrtimer_update_base(base); |
680 | timespec_to_ktime(realtime_offset); | ||
681 | base->clock_base[HRTIMER_BASE_BOOTTIME].offset = | ||
682 | timespec_to_ktime(sleep); | ||
683 | |||
684 | hrtimer_force_reprogram(base, 0); | 682 | hrtimer_force_reprogram(base, 0); |
685 | raw_spin_unlock(&base->lock); | 683 | raw_spin_unlock(&base->lock); |
686 | } | 684 | } |
@@ -710,13 +708,25 @@ static int hrtimer_switch_to_hres(void) | |||
710 | base->clock_base[i].resolution = KTIME_HIGH_RES; | 708 | base->clock_base[i].resolution = KTIME_HIGH_RES; |
711 | 709 | ||
712 | tick_setup_sched_timer(); | 710 | tick_setup_sched_timer(); |
713 | |||
714 | /* "Retrigger" the interrupt to get things going */ | 711 | /* "Retrigger" the interrupt to get things going */ |
715 | retrigger_next_event(NULL); | 712 | retrigger_next_event(NULL); |
716 | local_irq_restore(flags); | 713 | local_irq_restore(flags); |
717 | return 1; | 714 | return 1; |
718 | } | 715 | } |
719 | 716 | ||
717 | /* | ||
718 | * Called from timekeeping code to reprogramm the hrtimer interrupt | ||
719 | * device. If called from the timer interrupt context we defer it to | ||
720 | * softirq context. | ||
721 | */ | ||
722 | void clock_was_set_delayed(void) | ||
723 | { | ||
724 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
725 | |||
726 | cpu_base->clock_was_set = 1; | ||
727 | __raise_softirq_irqoff(HRTIMER_SOFTIRQ); | ||
728 | } | ||
729 | |||
720 | #else | 730 | #else |
721 | 731 | ||
722 | static inline int hrtimer_hres_active(void) { return 0; } | 732 | static inline int hrtimer_hres_active(void) { return 0; } |
@@ -1250,11 +1260,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1250 | cpu_base->nr_events++; | 1260 | cpu_base->nr_events++; |
1251 | dev->next_event.tv64 = KTIME_MAX; | 1261 | dev->next_event.tv64 = KTIME_MAX; |
1252 | 1262 | ||
1253 | entry_time = now = ktime_get(); | 1263 | raw_spin_lock(&cpu_base->lock); |
1264 | entry_time = now = hrtimer_update_base(cpu_base); | ||
1254 | retry: | 1265 | retry: |
1255 | expires_next.tv64 = KTIME_MAX; | 1266 | expires_next.tv64 = KTIME_MAX; |
1256 | |||
1257 | raw_spin_lock(&cpu_base->lock); | ||
1258 | /* | 1267 | /* |
1259 | * We set expires_next to KTIME_MAX here with cpu_base->lock | 1268 | * We set expires_next to KTIME_MAX here with cpu_base->lock |
1260 | * held to prevent that a timer is enqueued in our queue via | 1269 | * held to prevent that a timer is enqueued in our queue via |
@@ -1330,8 +1339,12 @@ retry: | |||
1330 | * We need to prevent that we loop forever in the hrtimer | 1339 | * We need to prevent that we loop forever in the hrtimer |
1331 | * interrupt routine. We give it 3 attempts to avoid | 1340 | * interrupt routine. We give it 3 attempts to avoid |
1332 | * overreacting on some spurious event. | 1341 | * overreacting on some spurious event. |
1342 | * | ||
1343 | * Acquire base lock for updating the offsets and retrieving | ||
1344 | * the current time. | ||
1333 | */ | 1345 | */ |
1334 | now = ktime_get(); | 1346 | raw_spin_lock(&cpu_base->lock); |
1347 | now = hrtimer_update_base(cpu_base); | ||
1335 | cpu_base->nr_retries++; | 1348 | cpu_base->nr_retries++; |
1336 | if (++retries < 3) | 1349 | if (++retries < 3) |
1337 | goto retry; | 1350 | goto retry; |
@@ -1343,6 +1356,7 @@ retry: | |||
1343 | */ | 1356 | */ |
1344 | cpu_base->nr_hangs++; | 1357 | cpu_base->nr_hangs++; |
1345 | cpu_base->hang_detected = 1; | 1358 | cpu_base->hang_detected = 1; |
1359 | raw_spin_unlock(&cpu_base->lock); | ||
1346 | delta = ktime_sub(now, entry_time); | 1360 | delta = ktime_sub(now, entry_time); |
1347 | if (delta.tv64 > cpu_base->max_hang_time.tv64) | 1361 | if (delta.tv64 > cpu_base->max_hang_time.tv64) |
1348 | cpu_base->max_hang_time = delta; | 1362 | cpu_base->max_hang_time = delta; |
@@ -1395,6 +1409,13 @@ void hrtimer_peek_ahead_timers(void) | |||
1395 | 1409 | ||
1396 | static void run_hrtimer_softirq(struct softirq_action *h) | 1410 | static void run_hrtimer_softirq(struct softirq_action *h) |
1397 | { | 1411 | { |
1412 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
1413 | |||
1414 | if (cpu_base->clock_was_set) { | ||
1415 | cpu_base->clock_was_set = 0; | ||
1416 | clock_was_set(); | ||
1417 | } | ||
1418 | |||
1398 | hrtimer_peek_ahead_timers(); | 1419 | hrtimer_peek_ahead_timers(); |
1399 | } | 1420 | } |
1400 | 1421 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index dba18211685e..177fa49357a5 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -194,8 +194,10 @@ static int console_may_schedule; | |||
194 | */ | 194 | */ |
195 | 195 | ||
196 | enum log_flags { | 196 | enum log_flags { |
197 | LOG_DEFAULT = 0, | 197 | LOG_NOCONS = 1, /* already flushed, do not print to console */ |
198 | LOG_NOCONS = 1, /* already flushed, do not print to console */ | 198 | LOG_NEWLINE = 2, /* text ended with a newline */ |
199 | LOG_PREFIX = 4, /* text started with a prefix */ | ||
200 | LOG_CONT = 8, /* text is a fragment of a continuation line */ | ||
199 | }; | 201 | }; |
200 | 202 | ||
201 | struct log { | 203 | struct log { |
@@ -217,6 +219,8 @@ static DEFINE_RAW_SPINLOCK(logbuf_lock); | |||
217 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | 219 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ |
218 | static u64 syslog_seq; | 220 | static u64 syslog_seq; |
219 | static u32 syslog_idx; | 221 | static u32 syslog_idx; |
222 | static enum log_flags syslog_prev; | ||
223 | static size_t syslog_partial; | ||
220 | 224 | ||
221 | /* index and sequence number of the first record stored in the buffer */ | 225 | /* index and sequence number of the first record stored in the buffer */ |
222 | static u64 log_first_seq; | 226 | static u64 log_first_seq; |
@@ -430,20 +434,20 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
430 | ret = mutex_lock_interruptible(&user->lock); | 434 | ret = mutex_lock_interruptible(&user->lock); |
431 | if (ret) | 435 | if (ret) |
432 | return ret; | 436 | return ret; |
433 | raw_spin_lock(&logbuf_lock); | 437 | raw_spin_lock_irq(&logbuf_lock); |
434 | while (user->seq == log_next_seq) { | 438 | while (user->seq == log_next_seq) { |
435 | if (file->f_flags & O_NONBLOCK) { | 439 | if (file->f_flags & O_NONBLOCK) { |
436 | ret = -EAGAIN; | 440 | ret = -EAGAIN; |
437 | raw_spin_unlock(&logbuf_lock); | 441 | raw_spin_unlock_irq(&logbuf_lock); |
438 | goto out; | 442 | goto out; |
439 | } | 443 | } |
440 | 444 | ||
441 | raw_spin_unlock(&logbuf_lock); | 445 | raw_spin_unlock_irq(&logbuf_lock); |
442 | ret = wait_event_interruptible(log_wait, | 446 | ret = wait_event_interruptible(log_wait, |
443 | user->seq != log_next_seq); | 447 | user->seq != log_next_seq); |
444 | if (ret) | 448 | if (ret) |
445 | goto out; | 449 | goto out; |
446 | raw_spin_lock(&logbuf_lock); | 450 | raw_spin_lock_irq(&logbuf_lock); |
447 | } | 451 | } |
448 | 452 | ||
449 | if (user->seq < log_first_seq) { | 453 | if (user->seq < log_first_seq) { |
@@ -451,7 +455,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
451 | user->idx = log_first_idx; | 455 | user->idx = log_first_idx; |
452 | user->seq = log_first_seq; | 456 | user->seq = log_first_seq; |
453 | ret = -EPIPE; | 457 | ret = -EPIPE; |
454 | raw_spin_unlock(&logbuf_lock); | 458 | raw_spin_unlock_irq(&logbuf_lock); |
455 | goto out; | 459 | goto out; |
456 | } | 460 | } |
457 | 461 | ||
@@ -465,7 +469,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
465 | for (i = 0; i < msg->text_len; i++) { | 469 | for (i = 0; i < msg->text_len; i++) { |
466 | unsigned char c = log_text(msg)[i]; | 470 | unsigned char c = log_text(msg)[i]; |
467 | 471 | ||
468 | if (c < ' ' || c >= 128) | 472 | if (c < ' ' || c >= 127 || c == '\\') |
469 | len += sprintf(user->buf + len, "\\x%02x", c); | 473 | len += sprintf(user->buf + len, "\\x%02x", c); |
470 | else | 474 | else |
471 | user->buf[len++] = c; | 475 | user->buf[len++] = c; |
@@ -489,7 +493,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
489 | continue; | 493 | continue; |
490 | } | 494 | } |
491 | 495 | ||
492 | if (c < ' ' || c >= 128) { | 496 | if (c < ' ' || c >= 127 || c == '\\') { |
493 | len += sprintf(user->buf + len, "\\x%02x", c); | 497 | len += sprintf(user->buf + len, "\\x%02x", c); |
494 | continue; | 498 | continue; |
495 | } | 499 | } |
@@ -501,7 +505,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
501 | 505 | ||
502 | user->idx = log_next(user->idx); | 506 | user->idx = log_next(user->idx); |
503 | user->seq++; | 507 | user->seq++; |
504 | raw_spin_unlock(&logbuf_lock); | 508 | raw_spin_unlock_irq(&logbuf_lock); |
505 | 509 | ||
506 | if (len > count) { | 510 | if (len > count) { |
507 | ret = -EINVAL; | 511 | ret = -EINVAL; |
@@ -528,7 +532,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) | |||
528 | if (offset) | 532 | if (offset) |
529 | return -ESPIPE; | 533 | return -ESPIPE; |
530 | 534 | ||
531 | raw_spin_lock(&logbuf_lock); | 535 | raw_spin_lock_irq(&logbuf_lock); |
532 | switch (whence) { | 536 | switch (whence) { |
533 | case SEEK_SET: | 537 | case SEEK_SET: |
534 | /* the first record */ | 538 | /* the first record */ |
@@ -552,7 +556,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) | |||
552 | default: | 556 | default: |
553 | ret = -EINVAL; | 557 | ret = -EINVAL; |
554 | } | 558 | } |
555 | raw_spin_unlock(&logbuf_lock); | 559 | raw_spin_unlock_irq(&logbuf_lock); |
556 | return ret; | 560 | return ret; |
557 | } | 561 | } |
558 | 562 | ||
@@ -566,14 +570,14 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) | |||
566 | 570 | ||
567 | poll_wait(file, &log_wait, wait); | 571 | poll_wait(file, &log_wait, wait); |
568 | 572 | ||
569 | raw_spin_lock(&logbuf_lock); | 573 | raw_spin_lock_irq(&logbuf_lock); |
570 | if (user->seq < log_next_seq) { | 574 | if (user->seq < log_next_seq) { |
571 | /* return error when data has vanished underneath us */ | 575 | /* return error when data has vanished underneath us */ |
572 | if (user->seq < log_first_seq) | 576 | if (user->seq < log_first_seq) |
573 | ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; | 577 | ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; |
574 | ret = POLLIN|POLLRDNORM; | 578 | ret = POLLIN|POLLRDNORM; |
575 | } | 579 | } |
576 | raw_spin_unlock(&logbuf_lock); | 580 | raw_spin_unlock_irq(&logbuf_lock); |
577 | 581 | ||
578 | return ret; | 582 | return ret; |
579 | } | 583 | } |
@@ -597,10 +601,10 @@ static int devkmsg_open(struct inode *inode, struct file *file) | |||
597 | 601 | ||
598 | mutex_init(&user->lock); | 602 | mutex_init(&user->lock); |
599 | 603 | ||
600 | raw_spin_lock(&logbuf_lock); | 604 | raw_spin_lock_irq(&logbuf_lock); |
601 | user->idx = log_first_idx; | 605 | user->idx = log_first_idx; |
602 | user->seq = log_first_seq; | 606 | user->seq = log_first_seq; |
603 | raw_spin_unlock(&logbuf_lock); | 607 | raw_spin_unlock_irq(&logbuf_lock); |
604 | 608 | ||
605 | file->private_data = user; | 609 | file->private_data = user; |
606 | return 0; | 610 | return 0; |
@@ -818,15 +822,18 @@ static size_t print_time(u64 ts, char *buf) | |||
818 | static size_t print_prefix(const struct log *msg, bool syslog, char *buf) | 822 | static size_t print_prefix(const struct log *msg, bool syslog, char *buf) |
819 | { | 823 | { |
820 | size_t len = 0; | 824 | size_t len = 0; |
825 | unsigned int prefix = (msg->facility << 3) | msg->level; | ||
821 | 826 | ||
822 | if (syslog) { | 827 | if (syslog) { |
823 | if (buf) { | 828 | if (buf) { |
824 | len += sprintf(buf, "<%u>", msg->level); | 829 | len += sprintf(buf, "<%u>", prefix); |
825 | } else { | 830 | } else { |
826 | len += 3; | 831 | len += 3; |
827 | if (msg->level > 9) | 832 | if (prefix > 999) |
828 | len++; | 833 | len += 3; |
829 | if (msg->level > 99) | 834 | else if (prefix > 99) |
835 | len += 2; | ||
836 | else if (prefix > 9) | ||
830 | len++; | 837 | len++; |
831 | } | 838 | } |
832 | } | 839 | } |
@@ -835,13 +842,26 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf) | |||
835 | return len; | 842 | return len; |
836 | } | 843 | } |
837 | 844 | ||
838 | static size_t msg_print_text(const struct log *msg, bool syslog, | 845 | static size_t msg_print_text(const struct log *msg, enum log_flags prev, |
839 | char *buf, size_t size) | 846 | bool syslog, char *buf, size_t size) |
840 | { | 847 | { |
841 | const char *text = log_text(msg); | 848 | const char *text = log_text(msg); |
842 | size_t text_size = msg->text_len; | 849 | size_t text_size = msg->text_len; |
850 | bool prefix = true; | ||
851 | bool newline = true; | ||
843 | size_t len = 0; | 852 | size_t len = 0; |
844 | 853 | ||
854 | if ((prev & LOG_CONT) && !(msg->flags & LOG_PREFIX)) | ||
855 | prefix = false; | ||
856 | |||
857 | if (msg->flags & LOG_CONT) { | ||
858 | if ((prev & LOG_CONT) && !(prev & LOG_NEWLINE)) | ||
859 | prefix = false; | ||
860 | |||
861 | if (!(msg->flags & LOG_NEWLINE)) | ||
862 | newline = false; | ||
863 | } | ||
864 | |||
845 | do { | 865 | do { |
846 | const char *next = memchr(text, '\n', text_size); | 866 | const char *next = memchr(text, '\n', text_size); |
847 | size_t text_len; | 867 | size_t text_len; |
@@ -859,16 +879,22 @@ static size_t msg_print_text(const struct log *msg, bool syslog, | |||
859 | text_len + 1>= size - len) | 879 | text_len + 1>= size - len) |
860 | break; | 880 | break; |
861 | 881 | ||
862 | len += print_prefix(msg, syslog, buf + len); | 882 | if (prefix) |
883 | len += print_prefix(msg, syslog, buf + len); | ||
863 | memcpy(buf + len, text, text_len); | 884 | memcpy(buf + len, text, text_len); |
864 | len += text_len; | 885 | len += text_len; |
865 | buf[len++] = '\n'; | 886 | if (next || newline) |
887 | buf[len++] = '\n'; | ||
866 | } else { | 888 | } else { |
867 | /* SYSLOG_ACTION_* buffer size only calculation */ | 889 | /* SYSLOG_ACTION_* buffer size only calculation */ |
868 | len += print_prefix(msg, syslog, NULL); | 890 | if (prefix) |
869 | len += text_len + 1; | 891 | len += print_prefix(msg, syslog, NULL); |
892 | len += text_len; | ||
893 | if (next || newline) | ||
894 | len++; | ||
870 | } | 895 | } |
871 | 896 | ||
897 | prefix = true; | ||
872 | text = next; | 898 | text = next; |
873 | } while (text); | 899 | } while (text); |
874 | 900 | ||
@@ -887,22 +913,35 @@ static int syslog_print(char __user *buf, int size) | |||
887 | 913 | ||
888 | while (size > 0) { | 914 | while (size > 0) { |
889 | size_t n; | 915 | size_t n; |
916 | size_t skip; | ||
890 | 917 | ||
891 | raw_spin_lock_irq(&logbuf_lock); | 918 | raw_spin_lock_irq(&logbuf_lock); |
892 | if (syslog_seq < log_first_seq) { | 919 | if (syslog_seq < log_first_seq) { |
893 | /* messages are gone, move to first one */ | 920 | /* messages are gone, move to first one */ |
894 | syslog_seq = log_first_seq; | 921 | syslog_seq = log_first_seq; |
895 | syslog_idx = log_first_idx; | 922 | syslog_idx = log_first_idx; |
923 | syslog_prev = 0; | ||
924 | syslog_partial = 0; | ||
896 | } | 925 | } |
897 | if (syslog_seq == log_next_seq) { | 926 | if (syslog_seq == log_next_seq) { |
898 | raw_spin_unlock_irq(&logbuf_lock); | 927 | raw_spin_unlock_irq(&logbuf_lock); |
899 | break; | 928 | break; |
900 | } | 929 | } |
930 | |||
931 | skip = syslog_partial; | ||
901 | msg = log_from_idx(syslog_idx); | 932 | msg = log_from_idx(syslog_idx); |
902 | n = msg_print_text(msg, true, text, LOG_LINE_MAX); | 933 | n = msg_print_text(msg, syslog_prev, true, text, LOG_LINE_MAX); |
903 | if (n <= size) { | 934 | if (n - syslog_partial <= size) { |
935 | /* message fits into buffer, move forward */ | ||
904 | syslog_idx = log_next(syslog_idx); | 936 | syslog_idx = log_next(syslog_idx); |
905 | syslog_seq++; | 937 | syslog_seq++; |
938 | syslog_prev = msg->flags; | ||
939 | n -= syslog_partial; | ||
940 | syslog_partial = 0; | ||
941 | } else if (!len){ | ||
942 | /* partial read(), remember position */ | ||
943 | n = size; | ||
944 | syslog_partial += n; | ||
906 | } else | 945 | } else |
907 | n = 0; | 946 | n = 0; |
908 | raw_spin_unlock_irq(&logbuf_lock); | 947 | raw_spin_unlock_irq(&logbuf_lock); |
@@ -910,17 +949,15 @@ static int syslog_print(char __user *buf, int size) | |||
910 | if (!n) | 949 | if (!n) |
911 | break; | 950 | break; |
912 | 951 | ||
913 | len += n; | 952 | if (copy_to_user(buf, text + skip, n)) { |
914 | size -= n; | ||
915 | buf += n; | ||
916 | n = copy_to_user(buf - n, text, n); | ||
917 | |||
918 | if (n) { | ||
919 | len -= n; | ||
920 | if (!len) | 953 | if (!len) |
921 | len = -EFAULT; | 954 | len = -EFAULT; |
922 | break; | 955 | break; |
923 | } | 956 | } |
957 | |||
958 | len += n; | ||
959 | size -= n; | ||
960 | buf += n; | ||
924 | } | 961 | } |
925 | 962 | ||
926 | kfree(text); | 963 | kfree(text); |
@@ -941,6 +978,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
941 | u64 next_seq; | 978 | u64 next_seq; |
942 | u64 seq; | 979 | u64 seq; |
943 | u32 idx; | 980 | u32 idx; |
981 | enum log_flags prev; | ||
944 | 982 | ||
945 | if (clear_seq < log_first_seq) { | 983 | if (clear_seq < log_first_seq) { |
946 | /* messages are gone, move to first available one */ | 984 | /* messages are gone, move to first available one */ |
@@ -954,10 +992,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
954 | */ | 992 | */ |
955 | seq = clear_seq; | 993 | seq = clear_seq; |
956 | idx = clear_idx; | 994 | idx = clear_idx; |
995 | prev = 0; | ||
957 | while (seq < log_next_seq) { | 996 | while (seq < log_next_seq) { |
958 | struct log *msg = log_from_idx(idx); | 997 | struct log *msg = log_from_idx(idx); |
959 | 998 | ||
960 | len += msg_print_text(msg, true, NULL, 0); | 999 | len += msg_print_text(msg, prev, true, NULL, 0); |
961 | idx = log_next(idx); | 1000 | idx = log_next(idx); |
962 | seq++; | 1001 | seq++; |
963 | } | 1002 | } |
@@ -965,10 +1004,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
965 | /* move first record forward until length fits into the buffer */ | 1004 | /* move first record forward until length fits into the buffer */ |
966 | seq = clear_seq; | 1005 | seq = clear_seq; |
967 | idx = clear_idx; | 1006 | idx = clear_idx; |
1007 | prev = 0; | ||
968 | while (len > size && seq < log_next_seq) { | 1008 | while (len > size && seq < log_next_seq) { |
969 | struct log *msg = log_from_idx(idx); | 1009 | struct log *msg = log_from_idx(idx); |
970 | 1010 | ||
971 | len -= msg_print_text(msg, true, NULL, 0); | 1011 | len -= msg_print_text(msg, prev, true, NULL, 0); |
972 | idx = log_next(idx); | 1012 | idx = log_next(idx); |
973 | seq++; | 1013 | seq++; |
974 | } | 1014 | } |
@@ -977,17 +1017,19 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
977 | next_seq = log_next_seq; | 1017 | next_seq = log_next_seq; |
978 | 1018 | ||
979 | len = 0; | 1019 | len = 0; |
1020 | prev = 0; | ||
980 | while (len >= 0 && seq < next_seq) { | 1021 | while (len >= 0 && seq < next_seq) { |
981 | struct log *msg = log_from_idx(idx); | 1022 | struct log *msg = log_from_idx(idx); |
982 | int textlen; | 1023 | int textlen; |
983 | 1024 | ||
984 | textlen = msg_print_text(msg, true, text, LOG_LINE_MAX); | 1025 | textlen = msg_print_text(msg, prev, true, text, LOG_LINE_MAX); |
985 | if (textlen < 0) { | 1026 | if (textlen < 0) { |
986 | len = textlen; | 1027 | len = textlen; |
987 | break; | 1028 | break; |
988 | } | 1029 | } |
989 | idx = log_next(idx); | 1030 | idx = log_next(idx); |
990 | seq++; | 1031 | seq++; |
1032 | prev = msg->flags; | ||
991 | 1033 | ||
992 | raw_spin_unlock_irq(&logbuf_lock); | 1034 | raw_spin_unlock_irq(&logbuf_lock); |
993 | if (copy_to_user(buf + len, text, textlen)) | 1035 | if (copy_to_user(buf + len, text, textlen)) |
@@ -1000,6 +1042,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
1000 | /* messages are gone, move to next one */ | 1042 | /* messages are gone, move to next one */ |
1001 | seq = log_first_seq; | 1043 | seq = log_first_seq; |
1002 | idx = log_first_idx; | 1044 | idx = log_first_idx; |
1045 | prev = 0; | ||
1003 | } | 1046 | } |
1004 | } | 1047 | } |
1005 | } | 1048 | } |
@@ -1018,7 +1061,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
1018 | { | 1061 | { |
1019 | bool clear = false; | 1062 | bool clear = false; |
1020 | static int saved_console_loglevel = -1; | 1063 | static int saved_console_loglevel = -1; |
1021 | static DEFINE_MUTEX(syslog_mutex); | ||
1022 | int error; | 1064 | int error; |
1023 | 1065 | ||
1024 | error = check_syslog_permissions(type, from_file); | 1066 | error = check_syslog_permissions(type, from_file); |
@@ -1045,17 +1087,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
1045 | error = -EFAULT; | 1087 | error = -EFAULT; |
1046 | goto out; | 1088 | goto out; |
1047 | } | 1089 | } |
1048 | error = mutex_lock_interruptible(&syslog_mutex); | ||
1049 | if (error) | ||
1050 | goto out; | ||
1051 | error = wait_event_interruptible(log_wait, | 1090 | error = wait_event_interruptible(log_wait, |
1052 | syslog_seq != log_next_seq); | 1091 | syslog_seq != log_next_seq); |
1053 | if (error) { | 1092 | if (error) |
1054 | mutex_unlock(&syslog_mutex); | ||
1055 | goto out; | 1093 | goto out; |
1056 | } | ||
1057 | error = syslog_print(buf, len); | 1094 | error = syslog_print(buf, len); |
1058 | mutex_unlock(&syslog_mutex); | ||
1059 | break; | 1095 | break; |
1060 | /* Read/clear last kernel messages */ | 1096 | /* Read/clear last kernel messages */ |
1061 | case SYSLOG_ACTION_READ_CLEAR: | 1097 | case SYSLOG_ACTION_READ_CLEAR: |
@@ -1111,6 +1147,8 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
1111 | /* messages are gone, move to first one */ | 1147 | /* messages are gone, move to first one */ |
1112 | syslog_seq = log_first_seq; | 1148 | syslog_seq = log_first_seq; |
1113 | syslog_idx = log_first_idx; | 1149 | syslog_idx = log_first_idx; |
1150 | syslog_prev = 0; | ||
1151 | syslog_partial = 0; | ||
1114 | } | 1152 | } |
1115 | if (from_file) { | 1153 | if (from_file) { |
1116 | /* | 1154 | /* |
@@ -1120,19 +1158,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
1120 | */ | 1158 | */ |
1121 | error = log_next_idx - syslog_idx; | 1159 | error = log_next_idx - syslog_idx; |
1122 | } else { | 1160 | } else { |
1123 | u64 seq; | 1161 | u64 seq = syslog_seq; |
1124 | u32 idx; | 1162 | u32 idx = syslog_idx; |
1163 | enum log_flags prev = syslog_prev; | ||
1125 | 1164 | ||
1126 | error = 0; | 1165 | error = 0; |
1127 | seq = syslog_seq; | ||
1128 | idx = syslog_idx; | ||
1129 | while (seq < log_next_seq) { | 1166 | while (seq < log_next_seq) { |
1130 | struct log *msg = log_from_idx(idx); | 1167 | struct log *msg = log_from_idx(idx); |
1131 | 1168 | ||
1132 | error += msg_print_text(msg, true, NULL, 0); | 1169 | error += msg_print_text(msg, prev, true, NULL, 0); |
1133 | idx = log_next(idx); | 1170 | idx = log_next(idx); |
1134 | seq++; | 1171 | seq++; |
1172 | prev = msg->flags; | ||
1135 | } | 1173 | } |
1174 | error -= syslog_partial; | ||
1136 | } | 1175 | } |
1137 | raw_spin_unlock_irq(&logbuf_lock); | 1176 | raw_spin_unlock_irq(&logbuf_lock); |
1138 | break; | 1177 | break; |
@@ -1400,10 +1439,9 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1400 | static char textbuf[LOG_LINE_MAX]; | 1439 | static char textbuf[LOG_LINE_MAX]; |
1401 | char *text = textbuf; | 1440 | char *text = textbuf; |
1402 | size_t text_len; | 1441 | size_t text_len; |
1442 | enum log_flags lflags = 0; | ||
1403 | unsigned long flags; | 1443 | unsigned long flags; |
1404 | int this_cpu; | 1444 | int this_cpu; |
1405 | bool newline = false; | ||
1406 | bool prefix = false; | ||
1407 | int printed_len = 0; | 1445 | int printed_len = 0; |
1408 | 1446 | ||
1409 | boot_delay_msec(); | 1447 | boot_delay_msec(); |
@@ -1442,7 +1480,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1442 | recursion_bug = 0; | 1480 | recursion_bug = 0; |
1443 | printed_len += strlen(recursion_msg); | 1481 | printed_len += strlen(recursion_msg); |
1444 | /* emit KERN_CRIT message */ | 1482 | /* emit KERN_CRIT message */ |
1445 | log_store(0, 2, LOG_DEFAULT, 0, | 1483 | log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, |
1446 | NULL, 0, recursion_msg, printed_len); | 1484 | NULL, 0, recursion_msg, printed_len); |
1447 | } | 1485 | } |
1448 | 1486 | ||
@@ -1455,7 +1493,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1455 | /* mark and strip a trailing newline */ | 1493 | /* mark and strip a trailing newline */ |
1456 | if (text_len && text[text_len-1] == '\n') { | 1494 | if (text_len && text[text_len-1] == '\n') { |
1457 | text_len--; | 1495 | text_len--; |
1458 | newline = true; | 1496 | lflags |= LOG_NEWLINE; |
1459 | } | 1497 | } |
1460 | 1498 | ||
1461 | /* strip syslog prefix and extract log level or control flags */ | 1499 | /* strip syslog prefix and extract log level or control flags */ |
@@ -1465,7 +1503,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1465 | if (level == -1) | 1503 | if (level == -1) |
1466 | level = text[1] - '0'; | 1504 | level = text[1] - '0'; |
1467 | case 'd': /* KERN_DEFAULT */ | 1505 | case 'd': /* KERN_DEFAULT */ |
1468 | prefix = true; | 1506 | lflags |= LOG_PREFIX; |
1469 | case 'c': /* KERN_CONT */ | 1507 | case 'c': /* KERN_CONT */ |
1470 | text += 3; | 1508 | text += 3; |
1471 | text_len -= 3; | 1509 | text_len -= 3; |
@@ -1475,22 +1513,20 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1475 | if (level == -1) | 1513 | if (level == -1) |
1476 | level = default_message_loglevel; | 1514 | level = default_message_loglevel; |
1477 | 1515 | ||
1478 | if (dict) { | 1516 | if (dict) |
1479 | prefix = true; | 1517 | lflags |= LOG_PREFIX|LOG_NEWLINE; |
1480 | newline = true; | ||
1481 | } | ||
1482 | 1518 | ||
1483 | if (!newline) { | 1519 | if (!(lflags & LOG_NEWLINE)) { |
1484 | /* | 1520 | /* |
1485 | * Flush the conflicting buffer. An earlier newline was missing, | 1521 | * Flush the conflicting buffer. An earlier newline was missing, |
1486 | * or another task also prints continuation lines. | 1522 | * or another task also prints continuation lines. |
1487 | */ | 1523 | */ |
1488 | if (cont.len && (prefix || cont.owner != current)) | 1524 | if (cont.len && (lflags & LOG_PREFIX || cont.owner != current)) |
1489 | cont_flush(); | 1525 | cont_flush(); |
1490 | 1526 | ||
1491 | /* buffer line if possible, otherwise store it right away */ | 1527 | /* buffer line if possible, otherwise store it right away */ |
1492 | if (!cont_add(facility, level, text, text_len)) | 1528 | if (!cont_add(facility, level, text, text_len)) |
1493 | log_store(facility, level, LOG_DEFAULT, 0, | 1529 | log_store(facility, level, lflags | LOG_CONT, 0, |
1494 | dict, dictlen, text, text_len); | 1530 | dict, dictlen, text, text_len); |
1495 | } else { | 1531 | } else { |
1496 | bool stored = false; | 1532 | bool stored = false; |
@@ -1502,13 +1538,13 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1502 | * flush it out and store this line separately. | 1538 | * flush it out and store this line separately. |
1503 | */ | 1539 | */ |
1504 | if (cont.len && cont.owner == current) { | 1540 | if (cont.len && cont.owner == current) { |
1505 | if (!prefix) | 1541 | if (!(lflags & LOG_PREFIX)) |
1506 | stored = cont_add(facility, level, text, text_len); | 1542 | stored = cont_add(facility, level, text, text_len); |
1507 | cont_flush(); | 1543 | cont_flush(); |
1508 | } | 1544 | } |
1509 | 1545 | ||
1510 | if (!stored) | 1546 | if (!stored) |
1511 | log_store(facility, level, LOG_DEFAULT, 0, | 1547 | log_store(facility, level, lflags, 0, |
1512 | dict, dictlen, text, text_len); | 1548 | dict, dictlen, text, text_len); |
1513 | } | 1549 | } |
1514 | printed_len += text_len; | 1550 | printed_len += text_len; |
@@ -1607,8 +1643,8 @@ static struct cont { | |||
1607 | static struct log *log_from_idx(u32 idx) { return NULL; } | 1643 | static struct log *log_from_idx(u32 idx) { return NULL; } |
1608 | static u32 log_next(u32 idx) { return 0; } | 1644 | static u32 log_next(u32 idx) { return 0; } |
1609 | static void call_console_drivers(int level, const char *text, size_t len) {} | 1645 | static void call_console_drivers(int level, const char *text, size_t len) {} |
1610 | static size_t msg_print_text(const struct log *msg, bool syslog, | 1646 | static size_t msg_print_text(const struct log *msg, enum log_flags prev, |
1611 | char *buf, size_t size) { return 0; } | 1647 | bool syslog, char *buf, size_t size) { return 0; } |
1612 | static size_t cont_print_text(char *text, size_t size) { return 0; } | 1648 | static size_t cont_print_text(char *text, size_t size) { return 0; } |
1613 | 1649 | ||
1614 | #endif /* CONFIG_PRINTK */ | 1650 | #endif /* CONFIG_PRINTK */ |
@@ -1884,6 +1920,7 @@ void wake_up_klogd(void) | |||
1884 | /* the next printk record to write to the console */ | 1920 | /* the next printk record to write to the console */ |
1885 | static u64 console_seq; | 1921 | static u64 console_seq; |
1886 | static u32 console_idx; | 1922 | static u32 console_idx; |
1923 | static enum log_flags console_prev; | ||
1887 | 1924 | ||
1888 | /** | 1925 | /** |
1889 | * console_unlock - unlock the console system | 1926 | * console_unlock - unlock the console system |
@@ -1944,6 +1981,7 @@ again: | |||
1944 | /* messages are gone, move to first one */ | 1981 | /* messages are gone, move to first one */ |
1945 | console_seq = log_first_seq; | 1982 | console_seq = log_first_seq; |
1946 | console_idx = log_first_idx; | 1983 | console_idx = log_first_idx; |
1984 | console_prev = 0; | ||
1947 | } | 1985 | } |
1948 | skip: | 1986 | skip: |
1949 | if (console_seq == log_next_seq) | 1987 | if (console_seq == log_next_seq) |
@@ -1957,14 +1995,21 @@ skip: | |||
1957 | */ | 1995 | */ |
1958 | console_idx = log_next(console_idx); | 1996 | console_idx = log_next(console_idx); |
1959 | console_seq++; | 1997 | console_seq++; |
1998 | /* | ||
1999 | * We will get here again when we register a new | ||
2000 | * CON_PRINTBUFFER console. Clear the flag so we | ||
2001 | * will properly dump everything later. | ||
2002 | */ | ||
2003 | msg->flags &= ~LOG_NOCONS; | ||
1960 | goto skip; | 2004 | goto skip; |
1961 | } | 2005 | } |
1962 | 2006 | ||
1963 | level = msg->level; | 2007 | level = msg->level; |
1964 | len = msg_print_text(msg, false, text, sizeof(text)); | 2008 | len = msg_print_text(msg, console_prev, false, |
1965 | 2009 | text, sizeof(text)); | |
1966 | console_idx = log_next(console_idx); | 2010 | console_idx = log_next(console_idx); |
1967 | console_seq++; | 2011 | console_seq++; |
2012 | console_prev = msg->flags; | ||
1968 | raw_spin_unlock(&logbuf_lock); | 2013 | raw_spin_unlock(&logbuf_lock); |
1969 | 2014 | ||
1970 | stop_critical_timings(); /* don't trace print latency */ | 2015 | stop_critical_timings(); /* don't trace print latency */ |
@@ -2227,6 +2272,7 @@ void register_console(struct console *newcon) | |||
2227 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2272 | raw_spin_lock_irqsave(&logbuf_lock, flags); |
2228 | console_seq = syslog_seq; | 2273 | console_seq = syslog_seq; |
2229 | console_idx = syslog_idx; | 2274 | console_idx = syslog_idx; |
2275 | console_prev = syslog_prev; | ||
2230 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | 2276 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); |
2231 | /* | 2277 | /* |
2232 | * We're about to replay the log buffer. Only do this to the | 2278 | * We're about to replay the log buffer. Only do this to the |
@@ -2520,8 +2566,7 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, | |||
2520 | } | 2566 | } |
2521 | 2567 | ||
2522 | msg = log_from_idx(dumper->cur_idx); | 2568 | msg = log_from_idx(dumper->cur_idx); |
2523 | l = msg_print_text(msg, syslog, | 2569 | l = msg_print_text(msg, 0, syslog, line, size); |
2524 | line, size); | ||
2525 | 2570 | ||
2526 | dumper->cur_idx = log_next(dumper->cur_idx); | 2571 | dumper->cur_idx = log_next(dumper->cur_idx); |
2527 | dumper->cur_seq++; | 2572 | dumper->cur_seq++; |
@@ -2561,6 +2606,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | |||
2561 | u32 idx; | 2606 | u32 idx; |
2562 | u64 next_seq; | 2607 | u64 next_seq; |
2563 | u32 next_idx; | 2608 | u32 next_idx; |
2609 | enum log_flags prev; | ||
2564 | size_t l = 0; | 2610 | size_t l = 0; |
2565 | bool ret = false; | 2611 | bool ret = false; |
2566 | 2612 | ||
@@ -2583,23 +2629,27 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | |||
2583 | /* calculate length of entire buffer */ | 2629 | /* calculate length of entire buffer */ |
2584 | seq = dumper->cur_seq; | 2630 | seq = dumper->cur_seq; |
2585 | idx = dumper->cur_idx; | 2631 | idx = dumper->cur_idx; |
2632 | prev = 0; | ||
2586 | while (seq < dumper->next_seq) { | 2633 | while (seq < dumper->next_seq) { |
2587 | struct log *msg = log_from_idx(idx); | 2634 | struct log *msg = log_from_idx(idx); |
2588 | 2635 | ||
2589 | l += msg_print_text(msg, true, NULL, 0); | 2636 | l += msg_print_text(msg, prev, true, NULL, 0); |
2590 | idx = log_next(idx); | 2637 | idx = log_next(idx); |
2591 | seq++; | 2638 | seq++; |
2639 | prev = msg->flags; | ||
2592 | } | 2640 | } |
2593 | 2641 | ||
2594 | /* move first record forward until length fits into the buffer */ | 2642 | /* move first record forward until length fits into the buffer */ |
2595 | seq = dumper->cur_seq; | 2643 | seq = dumper->cur_seq; |
2596 | idx = dumper->cur_idx; | 2644 | idx = dumper->cur_idx; |
2645 | prev = 0; | ||
2597 | while (l > size && seq < dumper->next_seq) { | 2646 | while (l > size && seq < dumper->next_seq) { |
2598 | struct log *msg = log_from_idx(idx); | 2647 | struct log *msg = log_from_idx(idx); |
2599 | 2648 | ||
2600 | l -= msg_print_text(msg, true, NULL, 0); | 2649 | l -= msg_print_text(msg, prev, true, NULL, 0); |
2601 | idx = log_next(idx); | 2650 | idx = log_next(idx); |
2602 | seq++; | 2651 | seq++; |
2652 | prev = msg->flags; | ||
2603 | } | 2653 | } |
2604 | 2654 | ||
2605 | /* last message in next interation */ | 2655 | /* last message in next interation */ |
@@ -2607,14 +2657,14 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, | |||
2607 | next_idx = idx; | 2657 | next_idx = idx; |
2608 | 2658 | ||
2609 | l = 0; | 2659 | l = 0; |
2660 | prev = 0; | ||
2610 | while (seq < dumper->next_seq) { | 2661 | while (seq < dumper->next_seq) { |
2611 | struct log *msg = log_from_idx(idx); | 2662 | struct log *msg = log_from_idx(idx); |
2612 | 2663 | ||
2613 | l += msg_print_text(msg, syslog, | 2664 | l += msg_print_text(msg, prev, syslog, buf + l, size - l); |
2614 | buf + l, size - l); | ||
2615 | |||
2616 | idx = log_next(idx); | 2665 | idx = log_next(idx); |
2617 | seq++; | 2666 | seq++; |
2667 | prev = msg->flags; | ||
2618 | } | 2668 | } |
2619 | 2669 | ||
2620 | dumper->next_seq = next_seq; | 2670 | dumper->next_seq = next_seq; |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 38ecdda3f55f..4b97bba7396e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -201,6 +201,7 @@ void rcu_note_context_switch(int cpu) | |||
201 | { | 201 | { |
202 | trace_rcu_utilization("Start context switch"); | 202 | trace_rcu_utilization("Start context switch"); |
203 | rcu_sched_qs(cpu); | 203 | rcu_sched_qs(cpu); |
204 | rcu_preempt_note_context_switch(cpu); | ||
204 | trace_rcu_utilization("End context switch"); | 205 | trace_rcu_utilization("End context switch"); |
205 | } | 206 | } |
206 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 207 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index ea056495783e..19b61ac1079f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -444,6 +444,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); | |||
444 | /* Forward declarations for rcutree_plugin.h */ | 444 | /* Forward declarations for rcutree_plugin.h */ |
445 | static void rcu_bootup_announce(void); | 445 | static void rcu_bootup_announce(void); |
446 | long rcu_batches_completed(void); | 446 | long rcu_batches_completed(void); |
447 | static void rcu_preempt_note_context_switch(int cpu); | ||
447 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | 448 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); |
448 | #ifdef CONFIG_HOTPLUG_CPU | 449 | #ifdef CONFIG_HOTPLUG_CPU |
449 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | 450 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5271a020887e..3e4899459f3d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu) | |||
153 | * | 153 | * |
154 | * Caller must disable preemption. | 154 | * Caller must disable preemption. |
155 | */ | 155 | */ |
156 | void rcu_preempt_note_context_switch(void) | 156 | static void rcu_preempt_note_context_switch(int cpu) |
157 | { | 157 | { |
158 | struct task_struct *t = current; | 158 | struct task_struct *t = current; |
159 | unsigned long flags; | 159 | unsigned long flags; |
@@ -164,7 +164,7 @@ void rcu_preempt_note_context_switch(void) | |||
164 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 164 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
165 | 165 | ||
166 | /* Possibly blocking in an RCU read-side critical section. */ | 166 | /* Possibly blocking in an RCU read-side critical section. */ |
167 | rdp = __this_cpu_ptr(rcu_preempt_state.rda); | 167 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); |
168 | rnp = rdp->mynode; | 168 | rnp = rdp->mynode; |
169 | raw_spin_lock_irqsave(&rnp->lock, flags); | 169 | raw_spin_lock_irqsave(&rnp->lock, flags); |
170 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 170 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
@@ -228,7 +228,7 @@ void rcu_preempt_note_context_switch(void) | |||
228 | * means that we continue to block the current grace period. | 228 | * means that we continue to block the current grace period. |
229 | */ | 229 | */ |
230 | local_irq_save(flags); | 230 | local_irq_save(flags); |
231 | rcu_preempt_qs(smp_processor_id()); | 231 | rcu_preempt_qs(cpu); |
232 | local_irq_restore(flags); | 232 | local_irq_restore(flags); |
233 | } | 233 | } |
234 | 234 | ||
@@ -1002,6 +1002,14 @@ void rcu_force_quiescent_state(void) | |||
1002 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | 1002 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
1003 | 1003 | ||
1004 | /* | 1004 | /* |
1005 | * Because preemptible RCU does not exist, we never have to check for | ||
1006 | * CPUs being in quiescent states. | ||
1007 | */ | ||
1008 | static void rcu_preempt_note_context_switch(int cpu) | ||
1009 | { | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1005 | * Because preemptible RCU does not exist, there are never any preempted | 1013 | * Because preemptible RCU does not exist, there are never any preempted |
1006 | * RCU readers. | 1014 | * RCU readers. |
1007 | */ | 1015 | */ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d5594a4268d4..468bdd44c1ba 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2081,7 +2081,6 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2081 | #endif | 2081 | #endif |
2082 | 2082 | ||
2083 | /* Here we just switch the register state and the stack. */ | 2083 | /* Here we just switch the register state and the stack. */ |
2084 | rcu_switch_from(prev); | ||
2085 | switch_to(prev, next, prev); | 2084 | switch_to(prev, next, prev); |
2086 | 2085 | ||
2087 | barrier(); | 2086 | barrier(); |
@@ -2161,11 +2160,73 @@ unsigned long this_cpu_load(void) | |||
2161 | } | 2160 | } |
2162 | 2161 | ||
2163 | 2162 | ||
2163 | /* | ||
2164 | * Global load-average calculations | ||
2165 | * | ||
2166 | * We take a distributed and async approach to calculating the global load-avg | ||
2167 | * in order to minimize overhead. | ||
2168 | * | ||
2169 | * The global load average is an exponentially decaying average of nr_running + | ||
2170 | * nr_uninterruptible. | ||
2171 | * | ||
2172 | * Once every LOAD_FREQ: | ||
2173 | * | ||
2174 | * nr_active = 0; | ||
2175 | * for_each_possible_cpu(cpu) | ||
2176 | * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible; | ||
2177 | * | ||
2178 | * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n) | ||
2179 | * | ||
2180 | * Due to a number of reasons the above turns in the mess below: | ||
2181 | * | ||
2182 | * - for_each_possible_cpu() is prohibitively expensive on machines with | ||
2183 | * serious number of cpus, therefore we need to take a distributed approach | ||
2184 | * to calculating nr_active. | ||
2185 | * | ||
2186 | * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 | ||
2187 | * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } | ||
2188 | * | ||
2189 | * So assuming nr_active := 0 when we start out -- true per definition, we | ||
2190 | * can simply take per-cpu deltas and fold those into a global accumulate | ||
2191 | * to obtain the same result. See calc_load_fold_active(). | ||
2192 | * | ||
2193 | * Furthermore, in order to avoid synchronizing all per-cpu delta folding | ||
2194 | * across the machine, we assume 10 ticks is sufficient time for every | ||
2195 | * cpu to have completed this task. | ||
2196 | * | ||
2197 | * This places an upper-bound on the IRQ-off latency of the machine. Then | ||
2198 | * again, being late doesn't loose the delta, just wrecks the sample. | ||
2199 | * | ||
2200 | * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because | ||
2201 | * this would add another cross-cpu cacheline miss and atomic operation | ||
2202 | * to the wakeup path. Instead we increment on whatever cpu the task ran | ||
2203 | * when it went into uninterruptible state and decrement on whatever cpu | ||
2204 | * did the wakeup. This means that only the sum of nr_uninterruptible over | ||
2205 | * all cpus yields the correct result. | ||
2206 | * | ||
2207 | * This covers the NO_HZ=n code, for extra head-aches, see the comment below. | ||
2208 | */ | ||
2209 | |||
2164 | /* Variables and functions for calc_load */ | 2210 | /* Variables and functions for calc_load */ |
2165 | static atomic_long_t calc_load_tasks; | 2211 | static atomic_long_t calc_load_tasks; |
2166 | static unsigned long calc_load_update; | 2212 | static unsigned long calc_load_update; |
2167 | unsigned long avenrun[3]; | 2213 | unsigned long avenrun[3]; |
2168 | EXPORT_SYMBOL(avenrun); | 2214 | EXPORT_SYMBOL(avenrun); /* should be removed */ |
2215 | |||
2216 | /** | ||
2217 | * get_avenrun - get the load average array | ||
2218 | * @loads: pointer to dest load array | ||
2219 | * @offset: offset to add | ||
2220 | * @shift: shift count to shift the result left | ||
2221 | * | ||
2222 | * These values are estimates at best, so no need for locking. | ||
2223 | */ | ||
2224 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
2225 | { | ||
2226 | loads[0] = (avenrun[0] + offset) << shift; | ||
2227 | loads[1] = (avenrun[1] + offset) << shift; | ||
2228 | loads[2] = (avenrun[2] + offset) << shift; | ||
2229 | } | ||
2169 | 2230 | ||
2170 | static long calc_load_fold_active(struct rq *this_rq) | 2231 | static long calc_load_fold_active(struct rq *this_rq) |
2171 | { | 2232 | { |
@@ -2182,6 +2243,9 @@ static long calc_load_fold_active(struct rq *this_rq) | |||
2182 | return delta; | 2243 | return delta; |
2183 | } | 2244 | } |
2184 | 2245 | ||
2246 | /* | ||
2247 | * a1 = a0 * e + a * (1 - e) | ||
2248 | */ | ||
2185 | static unsigned long | 2249 | static unsigned long |
2186 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | 2250 | calc_load(unsigned long load, unsigned long exp, unsigned long active) |
2187 | { | 2251 | { |
@@ -2193,30 +2257,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) | |||
2193 | 2257 | ||
2194 | #ifdef CONFIG_NO_HZ | 2258 | #ifdef CONFIG_NO_HZ |
2195 | /* | 2259 | /* |
2196 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. | 2260 | * Handle NO_HZ for the global load-average. |
2261 | * | ||
2262 | * Since the above described distributed algorithm to compute the global | ||
2263 | * load-average relies on per-cpu sampling from the tick, it is affected by | ||
2264 | * NO_HZ. | ||
2265 | * | ||
2266 | * The basic idea is to fold the nr_active delta into a global idle-delta upon | ||
2267 | * entering NO_HZ state such that we can include this as an 'extra' cpu delta | ||
2268 | * when we read the global state. | ||
2269 | * | ||
2270 | * Obviously reality has to ruin such a delightfully simple scheme: | ||
2271 | * | ||
2272 | * - When we go NO_HZ idle during the window, we can negate our sample | ||
2273 | * contribution, causing under-accounting. | ||
2274 | * | ||
2275 | * We avoid this by keeping two idle-delta counters and flipping them | ||
2276 | * when the window starts, thus separating old and new NO_HZ load. | ||
2277 | * | ||
2278 | * The only trick is the slight shift in index flip for read vs write. | ||
2279 | * | ||
2280 | * 0s 5s 10s 15s | ||
2281 | * +10 +10 +10 +10 | ||
2282 | * |-|-----------|-|-----------|-|-----------|-| | ||
2283 | * r:0 0 1 1 0 0 1 1 0 | ||
2284 | * w:0 1 1 0 0 1 1 0 0 | ||
2285 | * | ||
2286 | * This ensures we'll fold the old idle contribution in this window while | ||
2287 | * accumlating the new one. | ||
2288 | * | ||
2289 | * - When we wake up from NO_HZ idle during the window, we push up our | ||
2290 | * contribution, since we effectively move our sample point to a known | ||
2291 | * busy state. | ||
2292 | * | ||
2293 | * This is solved by pushing the window forward, and thus skipping the | ||
2294 | * sample, for this cpu (effectively using the idle-delta for this cpu which | ||
2295 | * was in effect at the time the window opened). This also solves the issue | ||
2296 | * of having to deal with a cpu having been in NOHZ idle for multiple | ||
2297 | * LOAD_FREQ intervals. | ||
2197 | * | 2298 | * |
2198 | * When making the ILB scale, we should try to pull this in as well. | 2299 | * When making the ILB scale, we should try to pull this in as well. |
2199 | */ | 2300 | */ |
2200 | static atomic_long_t calc_load_tasks_idle; | 2301 | static atomic_long_t calc_load_idle[2]; |
2302 | static int calc_load_idx; | ||
2201 | 2303 | ||
2202 | void calc_load_account_idle(struct rq *this_rq) | 2304 | static inline int calc_load_write_idx(void) |
2203 | { | 2305 | { |
2306 | int idx = calc_load_idx; | ||
2307 | |||
2308 | /* | ||
2309 | * See calc_global_nohz(), if we observe the new index, we also | ||
2310 | * need to observe the new update time. | ||
2311 | */ | ||
2312 | smp_rmb(); | ||
2313 | |||
2314 | /* | ||
2315 | * If the folding window started, make sure we start writing in the | ||
2316 | * next idle-delta. | ||
2317 | */ | ||
2318 | if (!time_before(jiffies, calc_load_update)) | ||
2319 | idx++; | ||
2320 | |||
2321 | return idx & 1; | ||
2322 | } | ||
2323 | |||
2324 | static inline int calc_load_read_idx(void) | ||
2325 | { | ||
2326 | return calc_load_idx & 1; | ||
2327 | } | ||
2328 | |||
2329 | void calc_load_enter_idle(void) | ||
2330 | { | ||
2331 | struct rq *this_rq = this_rq(); | ||
2204 | long delta; | 2332 | long delta; |
2205 | 2333 | ||
2334 | /* | ||
2335 | * We're going into NOHZ mode, if there's any pending delta, fold it | ||
2336 | * into the pending idle delta. | ||
2337 | */ | ||
2206 | delta = calc_load_fold_active(this_rq); | 2338 | delta = calc_load_fold_active(this_rq); |
2207 | if (delta) | 2339 | if (delta) { |
2208 | atomic_long_add(delta, &calc_load_tasks_idle); | 2340 | int idx = calc_load_write_idx(); |
2341 | atomic_long_add(delta, &calc_load_idle[idx]); | ||
2342 | } | ||
2209 | } | 2343 | } |
2210 | 2344 | ||
2211 | static long calc_load_fold_idle(void) | 2345 | void calc_load_exit_idle(void) |
2212 | { | 2346 | { |
2213 | long delta = 0; | 2347 | struct rq *this_rq = this_rq(); |
2348 | |||
2349 | /* | ||
2350 | * If we're still before the sample window, we're done. | ||
2351 | */ | ||
2352 | if (time_before(jiffies, this_rq->calc_load_update)) | ||
2353 | return; | ||
2214 | 2354 | ||
2215 | /* | 2355 | /* |
2216 | * Its got a race, we don't care... | 2356 | * We woke inside or after the sample window, this means we're already |
2357 | * accounted through the nohz accounting, so skip the entire deal and | ||
2358 | * sync up for the next window. | ||
2217 | */ | 2359 | */ |
2218 | if (atomic_long_read(&calc_load_tasks_idle)) | 2360 | this_rq->calc_load_update = calc_load_update; |
2219 | delta = atomic_long_xchg(&calc_load_tasks_idle, 0); | 2361 | if (time_before(jiffies, this_rq->calc_load_update + 10)) |
2362 | this_rq->calc_load_update += LOAD_FREQ; | ||
2363 | } | ||
2364 | |||
2365 | static long calc_load_fold_idle(void) | ||
2366 | { | ||
2367 | int idx = calc_load_read_idx(); | ||
2368 | long delta = 0; | ||
2369 | |||
2370 | if (atomic_long_read(&calc_load_idle[idx])) | ||
2371 | delta = atomic_long_xchg(&calc_load_idle[idx], 0); | ||
2220 | 2372 | ||
2221 | return delta; | 2373 | return delta; |
2222 | } | 2374 | } |
@@ -2302,66 +2454,39 @@ static void calc_global_nohz(void) | |||
2302 | { | 2454 | { |
2303 | long delta, active, n; | 2455 | long delta, active, n; |
2304 | 2456 | ||
2305 | /* | 2457 | if (!time_before(jiffies, calc_load_update + 10)) { |
2306 | * If we crossed a calc_load_update boundary, make sure to fold | 2458 | /* |
2307 | * any pending idle changes, the respective CPUs might have | 2459 | * Catch-up, fold however many we are behind still |
2308 | * missed the tick driven calc_load_account_active() update | 2460 | */ |
2309 | * due to NO_HZ. | 2461 | delta = jiffies - calc_load_update - 10; |
2310 | */ | 2462 | n = 1 + (delta / LOAD_FREQ); |
2311 | delta = calc_load_fold_idle(); | ||
2312 | if (delta) | ||
2313 | atomic_long_add(delta, &calc_load_tasks); | ||
2314 | |||
2315 | /* | ||
2316 | * It could be the one fold was all it took, we done! | ||
2317 | */ | ||
2318 | if (time_before(jiffies, calc_load_update + 10)) | ||
2319 | return; | ||
2320 | |||
2321 | /* | ||
2322 | * Catch-up, fold however many we are behind still | ||
2323 | */ | ||
2324 | delta = jiffies - calc_load_update - 10; | ||
2325 | n = 1 + (delta / LOAD_FREQ); | ||
2326 | 2463 | ||
2327 | active = atomic_long_read(&calc_load_tasks); | 2464 | active = atomic_long_read(&calc_load_tasks); |
2328 | active = active > 0 ? active * FIXED_1 : 0; | 2465 | active = active > 0 ? active * FIXED_1 : 0; |
2329 | 2466 | ||
2330 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | 2467 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); |
2331 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | 2468 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); |
2332 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | 2469 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); |
2333 | 2470 | ||
2334 | calc_load_update += n * LOAD_FREQ; | 2471 | calc_load_update += n * LOAD_FREQ; |
2335 | } | 2472 | } |
2336 | #else | ||
2337 | void calc_load_account_idle(struct rq *this_rq) | ||
2338 | { | ||
2339 | } | ||
2340 | 2473 | ||
2341 | static inline long calc_load_fold_idle(void) | 2474 | /* |
2342 | { | 2475 | * Flip the idle index... |
2343 | return 0; | 2476 | * |
2477 | * Make sure we first write the new time then flip the index, so that | ||
2478 | * calc_load_write_idx() will see the new time when it reads the new | ||
2479 | * index, this avoids a double flip messing things up. | ||
2480 | */ | ||
2481 | smp_wmb(); | ||
2482 | calc_load_idx++; | ||
2344 | } | 2483 | } |
2484 | #else /* !CONFIG_NO_HZ */ | ||
2345 | 2485 | ||
2346 | static void calc_global_nohz(void) | 2486 | static inline long calc_load_fold_idle(void) { return 0; } |
2347 | { | 2487 | static inline void calc_global_nohz(void) { } |
2348 | } | ||
2349 | #endif | ||
2350 | 2488 | ||
2351 | /** | 2489 | #endif /* CONFIG_NO_HZ */ |
2352 | * get_avenrun - get the load average array | ||
2353 | * @loads: pointer to dest load array | ||
2354 | * @offset: offset to add | ||
2355 | * @shift: shift count to shift the result left | ||
2356 | * | ||
2357 | * These values are estimates at best, so no need for locking. | ||
2358 | */ | ||
2359 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
2360 | { | ||
2361 | loads[0] = (avenrun[0] + offset) << shift; | ||
2362 | loads[1] = (avenrun[1] + offset) << shift; | ||
2363 | loads[2] = (avenrun[2] + offset) << shift; | ||
2364 | } | ||
2365 | 2490 | ||
2366 | /* | 2491 | /* |
2367 | * calc_load - update the avenrun load estimates 10 ticks after the | 2492 | * calc_load - update the avenrun load estimates 10 ticks after the |
@@ -2369,11 +2494,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | |||
2369 | */ | 2494 | */ |
2370 | void calc_global_load(unsigned long ticks) | 2495 | void calc_global_load(unsigned long ticks) |
2371 | { | 2496 | { |
2372 | long active; | 2497 | long active, delta; |
2373 | 2498 | ||
2374 | if (time_before(jiffies, calc_load_update + 10)) | 2499 | if (time_before(jiffies, calc_load_update + 10)) |
2375 | return; | 2500 | return; |
2376 | 2501 | ||
2502 | /* | ||
2503 | * Fold the 'old' idle-delta to include all NO_HZ cpus. | ||
2504 | */ | ||
2505 | delta = calc_load_fold_idle(); | ||
2506 | if (delta) | ||
2507 | atomic_long_add(delta, &calc_load_tasks); | ||
2508 | |||
2377 | active = atomic_long_read(&calc_load_tasks); | 2509 | active = atomic_long_read(&calc_load_tasks); |
2378 | active = active > 0 ? active * FIXED_1 : 0; | 2510 | active = active > 0 ? active * FIXED_1 : 0; |
2379 | 2511 | ||
@@ -2384,12 +2516,7 @@ void calc_global_load(unsigned long ticks) | |||
2384 | calc_load_update += LOAD_FREQ; | 2516 | calc_load_update += LOAD_FREQ; |
2385 | 2517 | ||
2386 | /* | 2518 | /* |
2387 | * Account one period with whatever state we found before | 2519 | * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. |
2388 | * folding in the nohz state and ageing the entire idle period. | ||
2389 | * | ||
2390 | * This avoids loosing a sample when we go idle between | ||
2391 | * calc_load_account_active() (10 ticks ago) and now and thus | ||
2392 | * under-accounting. | ||
2393 | */ | 2520 | */ |
2394 | calc_global_nohz(); | 2521 | calc_global_nohz(); |
2395 | } | 2522 | } |
@@ -2406,7 +2533,6 @@ static void calc_load_account_active(struct rq *this_rq) | |||
2406 | return; | 2533 | return; |
2407 | 2534 | ||
2408 | delta = calc_load_fold_active(this_rq); | 2535 | delta = calc_load_fold_active(this_rq); |
2409 | delta += calc_load_fold_idle(); | ||
2410 | if (delta) | 2536 | if (delta) |
2411 | atomic_long_add(delta, &calc_load_tasks); | 2537 | atomic_long_add(delta, &calc_load_tasks); |
2412 | 2538 | ||
@@ -2414,6 +2540,10 @@ static void calc_load_account_active(struct rq *this_rq) | |||
2414 | } | 2540 | } |
2415 | 2541 | ||
2416 | /* | 2542 | /* |
2543 | * End of global load-average stuff | ||
2544 | */ | ||
2545 | |||
2546 | /* | ||
2417 | * The exact cpuload at various idx values, calculated at every tick would be | 2547 | * The exact cpuload at various idx values, calculated at every tick would be |
2418 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load | 2548 | * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load |
2419 | * | 2549 | * |
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index b44d604b35d1..b6baf370cae9 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c | |||
@@ -25,7 +25,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl | |||
25 | static struct task_struct *pick_next_task_idle(struct rq *rq) | 25 | static struct task_struct *pick_next_task_idle(struct rq *rq) |
26 | { | 26 | { |
27 | schedstat_inc(rq, sched_goidle); | 27 | schedstat_inc(rq, sched_goidle); |
28 | calc_load_account_idle(rq); | ||
29 | return rq->idle; | 28 | return rq->idle; |
30 | } | 29 | } |
31 | 30 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6d52cea7f33d..55844f24435a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -942,8 +942,6 @@ static inline u64 sched_avg_period(void) | |||
942 | return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; | 942 | return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; |
943 | } | 943 | } |
944 | 944 | ||
945 | void calc_load_account_idle(struct rq *this_rq); | ||
946 | |||
947 | #ifdef CONFIG_SCHED_HRTICK | 945 | #ifdef CONFIG_SCHED_HRTICK |
948 | 946 | ||
949 | /* | 947 | /* |
diff --git a/kernel/sys.c b/kernel/sys.c index e0c8ffc50d7f..2d39a84cd857 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1788,7 +1788,6 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1788 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1788 | #ifdef CONFIG_CHECKPOINT_RESTORE |
1789 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | 1789 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) |
1790 | { | 1790 | { |
1791 | struct vm_area_struct *vma; | ||
1792 | struct file *exe_file; | 1791 | struct file *exe_file; |
1793 | struct dentry *dentry; | 1792 | struct dentry *dentry; |
1794 | int err; | 1793 | int err; |
@@ -1816,13 +1815,17 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1816 | down_write(&mm->mmap_sem); | 1815 | down_write(&mm->mmap_sem); |
1817 | 1816 | ||
1818 | /* | 1817 | /* |
1819 | * Forbid mm->exe_file change if there are mapped other files. | 1818 | * Forbid mm->exe_file change if old file still mapped. |
1820 | */ | 1819 | */ |
1821 | err = -EBUSY; | 1820 | err = -EBUSY; |
1822 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 1821 | if (mm->exe_file) { |
1823 | if (vma->vm_file && !path_equal(&vma->vm_file->f_path, | 1822 | struct vm_area_struct *vma; |
1824 | &exe_file->f_path)) | 1823 | |
1825 | goto exit_unlock; | 1824 | for (vma = mm->mmap; vma; vma = vma->vm_next) |
1825 | if (vma->vm_file && | ||
1826 | path_equal(&vma->vm_file->f_path, | ||
1827 | &mm->exe_file->f_path)) | ||
1828 | goto exit_unlock; | ||
1826 | } | 1829 | } |
1827 | 1830 | ||
1828 | /* | 1831 | /* |
@@ -1835,6 +1838,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | |||
1835 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) | 1838 | if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) |
1836 | goto exit_unlock; | 1839 | goto exit_unlock; |
1837 | 1840 | ||
1841 | err = 0; | ||
1838 | set_mm_exe_file(mm, exe_file); | 1842 | set_mm_exe_file(mm, exe_file); |
1839 | exit_unlock: | 1843 | exit_unlock: |
1840 | up_write(&mm->mmap_sem); | 1844 | up_write(&mm->mmap_sem); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 869997833928..4a08472c3ca7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -406,6 +406,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) | |||
406 | */ | 406 | */ |
407 | if (!ts->tick_stopped) { | 407 | if (!ts->tick_stopped) { |
408 | select_nohz_load_balancer(1); | 408 | select_nohz_load_balancer(1); |
409 | calc_load_enter_idle(); | ||
409 | 410 | ||
410 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); | 411 | ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); |
411 | ts->tick_stopped = 1; | 412 | ts->tick_stopped = 1; |
@@ -597,6 +598,7 @@ void tick_nohz_idle_exit(void) | |||
597 | account_idle_ticks(ticks); | 598 | account_idle_ticks(ticks); |
598 | #endif | 599 | #endif |
599 | 600 | ||
601 | calc_load_exit_idle(); | ||
600 | touch_softlockup_watchdog(); | 602 | touch_softlockup_watchdog(); |
601 | /* | 603 | /* |
602 | * Cancel the scheduled timer and restore the tick | 604 | * Cancel the scheduled timer and restore the tick |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6f46a00a1e8a..3447cfaf11e7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -70,6 +70,12 @@ struct timekeeper { | |||
70 | /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ | 70 | /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ |
71 | struct timespec raw_time; | 71 | struct timespec raw_time; |
72 | 72 | ||
73 | /* Offset clock monotonic -> clock realtime */ | ||
74 | ktime_t offs_real; | ||
75 | |||
76 | /* Offset clock monotonic -> clock boottime */ | ||
77 | ktime_t offs_boot; | ||
78 | |||
73 | /* Seqlock for all timekeeper values */ | 79 | /* Seqlock for all timekeeper values */ |
74 | seqlock_t lock; | 80 | seqlock_t lock; |
75 | }; | 81 | }; |
@@ -172,6 +178,14 @@ static inline s64 timekeeping_get_ns_raw(void) | |||
172 | return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); | 178 | return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); |
173 | } | 179 | } |
174 | 180 | ||
181 | static void update_rt_offset(void) | ||
182 | { | ||
183 | struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic; | ||
184 | |||
185 | set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); | ||
186 | timekeeper.offs_real = timespec_to_ktime(tmp); | ||
187 | } | ||
188 | |||
175 | /* must hold write on timekeeper.lock */ | 189 | /* must hold write on timekeeper.lock */ |
176 | static void timekeeping_update(bool clearntp) | 190 | static void timekeeping_update(bool clearntp) |
177 | { | 191 | { |
@@ -179,6 +193,7 @@ static void timekeeping_update(bool clearntp) | |||
179 | timekeeper.ntp_error = 0; | 193 | timekeeper.ntp_error = 0; |
180 | ntp_clear(); | 194 | ntp_clear(); |
181 | } | 195 | } |
196 | update_rt_offset(); | ||
182 | update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, | 197 | update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, |
183 | timekeeper.clock, timekeeper.mult); | 198 | timekeeper.clock, timekeeper.mult); |
184 | } | 199 | } |
@@ -604,6 +619,7 @@ void __init timekeeping_init(void) | |||
604 | } | 619 | } |
605 | set_normalized_timespec(&timekeeper.wall_to_monotonic, | 620 | set_normalized_timespec(&timekeeper.wall_to_monotonic, |
606 | -boot.tv_sec, -boot.tv_nsec); | 621 | -boot.tv_sec, -boot.tv_nsec); |
622 | update_rt_offset(); | ||
607 | timekeeper.total_sleep_time.tv_sec = 0; | 623 | timekeeper.total_sleep_time.tv_sec = 0; |
608 | timekeeper.total_sleep_time.tv_nsec = 0; | 624 | timekeeper.total_sleep_time.tv_nsec = 0; |
609 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 625 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
@@ -612,6 +628,12 @@ void __init timekeeping_init(void) | |||
612 | /* time in seconds when suspend began */ | 628 | /* time in seconds when suspend began */ |
613 | static struct timespec timekeeping_suspend_time; | 629 | static struct timespec timekeeping_suspend_time; |
614 | 630 | ||
631 | static void update_sleep_time(struct timespec t) | ||
632 | { | ||
633 | timekeeper.total_sleep_time = t; | ||
634 | timekeeper.offs_boot = timespec_to_ktime(t); | ||
635 | } | ||
636 | |||
615 | /** | 637 | /** |
616 | * __timekeeping_inject_sleeptime - Internal function to add sleep interval | 638 | * __timekeeping_inject_sleeptime - Internal function to add sleep interval |
617 | * @delta: pointer to a timespec delta value | 639 | * @delta: pointer to a timespec delta value |
@@ -630,8 +652,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) | |||
630 | timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); | 652 | timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); |
631 | timekeeper.wall_to_monotonic = | 653 | timekeeper.wall_to_monotonic = |
632 | timespec_sub(timekeeper.wall_to_monotonic, *delta); | 654 | timespec_sub(timekeeper.wall_to_monotonic, *delta); |
633 | timekeeper.total_sleep_time = timespec_add( | 655 | update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); |
634 | timekeeper.total_sleep_time, *delta); | ||
635 | } | 656 | } |
636 | 657 | ||
637 | 658 | ||
@@ -696,6 +717,7 @@ static void timekeeping_resume(void) | |||
696 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 717 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
697 | timekeeper.ntp_error = 0; | 718 | timekeeper.ntp_error = 0; |
698 | timekeeping_suspended = 0; | 719 | timekeeping_suspended = 0; |
720 | timekeeping_update(false); | ||
699 | write_sequnlock_irqrestore(&timekeeper.lock, flags); | 721 | write_sequnlock_irqrestore(&timekeeper.lock, flags); |
700 | 722 | ||
701 | touch_softlockup_watchdog(); | 723 | touch_softlockup_watchdog(); |
@@ -963,6 +985,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | |||
963 | leap = second_overflow(timekeeper.xtime.tv_sec); | 985 | leap = second_overflow(timekeeper.xtime.tv_sec); |
964 | timekeeper.xtime.tv_sec += leap; | 986 | timekeeper.xtime.tv_sec += leap; |
965 | timekeeper.wall_to_monotonic.tv_sec -= leap; | 987 | timekeeper.wall_to_monotonic.tv_sec -= leap; |
988 | if (leap) | ||
989 | clock_was_set_delayed(); | ||
966 | } | 990 | } |
967 | 991 | ||
968 | /* Accumulate raw time */ | 992 | /* Accumulate raw time */ |
@@ -1079,6 +1103,8 @@ static void update_wall_time(void) | |||
1079 | leap = second_overflow(timekeeper.xtime.tv_sec); | 1103 | leap = second_overflow(timekeeper.xtime.tv_sec); |
1080 | timekeeper.xtime.tv_sec += leap; | 1104 | timekeeper.xtime.tv_sec += leap; |
1081 | timekeeper.wall_to_monotonic.tv_sec -= leap; | 1105 | timekeeper.wall_to_monotonic.tv_sec -= leap; |
1106 | if (leap) | ||
1107 | clock_was_set_delayed(); | ||
1082 | } | 1108 | } |
1083 | 1109 | ||
1084 | timekeeping_update(false); | 1110 | timekeeping_update(false); |
@@ -1246,6 +1272,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, | |||
1246 | } while (read_seqretry(&timekeeper.lock, seq)); | 1272 | } while (read_seqretry(&timekeeper.lock, seq)); |
1247 | } | 1273 | } |
1248 | 1274 | ||
1275 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1276 | /** | ||
1277 | * ktime_get_update_offsets - hrtimer helper | ||
1278 | * @offs_real: pointer to storage for monotonic -> realtime offset | ||
1279 | * @offs_boot: pointer to storage for monotonic -> boottime offset | ||
1280 | * | ||
1281 | * Returns current monotonic time and updates the offsets | ||
1282 | * Called from hrtimer_interupt() or retrigger_next_event() | ||
1283 | */ | ||
1284 | ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) | ||
1285 | { | ||
1286 | ktime_t now; | ||
1287 | unsigned int seq; | ||
1288 | u64 secs, nsecs; | ||
1289 | |||
1290 | do { | ||
1291 | seq = read_seqbegin(&timekeeper.lock); | ||
1292 | |||
1293 | secs = timekeeper.xtime.tv_sec; | ||
1294 | nsecs = timekeeper.xtime.tv_nsec; | ||
1295 | nsecs += timekeeping_get_ns(); | ||
1296 | /* If arch requires, add in gettimeoffset() */ | ||
1297 | nsecs += arch_gettimeoffset(); | ||
1298 | |||
1299 | *offs_real = timekeeper.offs_real; | ||
1300 | *offs_boot = timekeeper.offs_boot; | ||
1301 | } while (read_seqretry(&timekeeper.lock, seq)); | ||
1302 | |||
1303 | now = ktime_add_ns(ktime_set(secs, 0), nsecs); | ||
1304 | now = ktime_sub(now, *offs_real); | ||
1305 | return now; | ||
1306 | } | ||
1307 | #endif | ||
1308 | |||
1249 | /** | 1309 | /** |
1250 | * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format | 1310 | * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format |
1251 | */ | 1311 | */ |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 82a3e0c56b1d..49491fa7daa2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -1075,6 +1075,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) | |||
1075 | rb_init_page(bpage->page); | 1075 | rb_init_page(bpage->page); |
1076 | 1076 | ||
1077 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); | 1077 | INIT_LIST_HEAD(&cpu_buffer->reader_page->list); |
1078 | INIT_LIST_HEAD(&cpu_buffer->new_pages); | ||
1078 | 1079 | ||
1079 | ret = rb_allocate_pages(cpu_buffer, nr_pages); | 1080 | ret = rb_allocate_pages(cpu_buffer, nr_pages); |
1080 | if (ret < 0) | 1081 | if (ret < 0) |
@@ -1346,10 +1347,9 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages) | |||
1346 | * If something was added to this page, it was full | 1347 | * If something was added to this page, it was full |
1347 | * since it is not the tail page. So we deduct the | 1348 | * since it is not the tail page. So we deduct the |
1348 | * bytes consumed in ring buffer from here. | 1349 | * bytes consumed in ring buffer from here. |
1349 | * No need to update overruns, since this page is | 1350 | * Increment overrun to account for the lost events. |
1350 | * deleted from ring buffer and its entries are | ||
1351 | * already accounted for. | ||
1352 | */ | 1351 | */ |
1352 | local_add(page_entries, &cpu_buffer->overrun); | ||
1353 | local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); | 1353 | local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); |
1354 | } | 1354 | } |
1355 | 1355 | ||