diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2012-12-16 14:00:34 -0500 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2013-01-27 14:35:47 -0500 |
commit | 6a61671bb2f3a1bd12cd17b8fca811a624782632 (patch) | |
tree | 0afc2915fb7e517472710a49a524510322dd5baa /kernel | |
parent | c11f11fcbdb5be790c565aed46411486a7586afc (diff) |
cputime: Safely read cputime of full dynticks CPUs
While remotely reading the cputime of a task running in a
full dynticks CPU, the values stored in utime/stime fields
of struct task_struct may be stale. Its values may be those
of the last kernel <-> user transition time snapshot and
we need to add the tickless time spent since this snapshot.
To fix this, flush the cputime of the dynticks CPUs on
kernel <-> user transition and record the time / context
where we did this. Then on top of this snapshot and the current
time, perform the fixup on the reader side from task_times()
accessors.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[fixed kvm module related build errors]
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/context_tracking.c | 21 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/sched/core.c | 1 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 193 | ||||
-rw-r--r-- | kernel/softirq.c | 6 |
5 files changed, 211 insertions, 16 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 9002e92e6372..74f68f4dc6c2 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -1,8 +1,9 @@ | |||
1 | #include <linux/context_tracking.h> | 1 | #include <linux/context_tracking.h> |
2 | #include <linux/kvm_host.h> | ||
2 | #include <linux/rcupdate.h> | 3 | #include <linux/rcupdate.h> |
3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
4 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
5 | 6 | #include <linux/export.h> | |
6 | 7 | ||
7 | DEFINE_PER_CPU(struct context_tracking, context_tracking) = { | 8 | DEFINE_PER_CPU(struct context_tracking, context_tracking) = { |
8 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | 9 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE |
@@ -61,6 +62,24 @@ void user_exit(void) | |||
61 | local_irq_restore(flags); | 62 | local_irq_restore(flags); |
62 | } | 63 | } |
63 | 64 | ||
65 | void guest_enter(void) | ||
66 | { | ||
67 | if (vtime_accounting_enabled()) | ||
68 | vtime_guest_enter(current); | ||
69 | else | ||
70 | __guest_enter(); | ||
71 | } | ||
72 | EXPORT_SYMBOL_GPL(guest_enter); | ||
73 | |||
74 | void guest_exit(void) | ||
75 | { | ||
76 | if (vtime_accounting_enabled()) | ||
77 | vtime_guest_exit(current); | ||
78 | else | ||
79 | __guest_exit(); | ||
80 | } | ||
81 | EXPORT_SYMBOL_GPL(guest_exit); | ||
82 | |||
64 | void context_tracking_task_switch(struct task_struct *prev, | 83 | void context_tracking_task_switch(struct task_struct *prev, |
65 | struct task_struct *next) | 84 | struct task_struct *next) |
66 | { | 85 | { |
diff --git a/kernel/fork.c b/kernel/fork.c index 65ca6d27f24e..e68a95b4cf26 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1233 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 1233 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
1234 | p->prev_cputime.utime = p->prev_cputime.stime = 0; | 1234 | p->prev_cputime.utime = p->prev_cputime.stime = 0; |
1235 | #endif | 1235 | #endif |
1236 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | ||
1237 | seqlock_init(&p->vtime_seqlock); | ||
1238 | p->vtime_snap = 0; | ||
1239 | p->vtime_snap_whence = VTIME_SLEEPING; | ||
1240 | #endif | ||
1241 | |||
1236 | #if defined(SPLIT_RSS_COUNTING) | 1242 | #if defined(SPLIT_RSS_COUNTING) |
1237 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); | 1243 | memset(&p->rss_stat, 0, sizeof(p->rss_stat)); |
1238 | #endif | 1244 | #endif |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb0..261022d7e79d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4666,6 +4666,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
4666 | */ | 4666 | */ |
4667 | idle->sched_class = &idle_sched_class; | 4667 | idle->sched_class = &idle_sched_class; |
4668 | ftrace_graph_init_idle_task(idle, cpu); | 4668 | ftrace_graph_init_idle_task(idle, cpu); |
4669 | vtime_init_idle(idle); | ||
4669 | #if defined(CONFIG_SMP) | 4670 | #if defined(CONFIG_SMP) |
4670 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | 4671 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); |
4671 | #endif | 4672 | #endif |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a44ecdf809a1..082e05d915b4 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -492,7 +492,7 @@ void vtime_task_switch(struct task_struct *prev) | |||
492 | * vtime_account(). | 492 | * vtime_account(). |
493 | */ | 493 | */ |
494 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 494 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
495 | void vtime_account(struct task_struct *tsk) | 495 | void vtime_account_irq_enter(struct task_struct *tsk) |
496 | { | 496 | { |
497 | if (!vtime_accounting_enabled()) | 497 | if (!vtime_accounting_enabled()) |
498 | return; | 498 | return; |
@@ -516,7 +516,7 @@ void vtime_account(struct task_struct *tsk) | |||
516 | } | 516 | } |
517 | vtime_account_system(tsk); | 517 | vtime_account_system(tsk); |
518 | } | 518 | } |
519 | EXPORT_SYMBOL_GPL(vtime_account); | 519 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); |
520 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 520 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
521 | 521 | ||
522 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ | 522 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
@@ -600,28 +600,55 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime | |||
600 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ | 600 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
601 | 601 | ||
602 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | 602 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
603 | static DEFINE_PER_CPU(unsigned long long, cputime_snap); | 603 | static unsigned long long vtime_delta(struct task_struct *tsk) |
604 | { | ||
605 | unsigned long long clock; | ||
606 | |||
607 | clock = sched_clock(); | ||
608 | if (clock < tsk->vtime_snap) | ||
609 | return 0; | ||
604 | 610 | ||
605 | static cputime_t get_vtime_delta(void) | 611 | return clock - tsk->vtime_snap; |
612 | } | ||
613 | |||
614 | static cputime_t get_vtime_delta(struct task_struct *tsk) | ||
606 | { | 615 | { |
607 | unsigned long long delta; | 616 | unsigned long long delta = vtime_delta(tsk); |
608 | 617 | ||
609 | delta = sched_clock() - __this_cpu_read(cputime_snap); | 618 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); |
610 | __this_cpu_add(cputime_snap, delta); | 619 | tsk->vtime_snap += delta; |
611 | 620 | ||
612 | /* CHECKME: always safe to convert nsecs to cputime? */ | 621 | /* CHECKME: always safe to convert nsecs to cputime? */ |
613 | return nsecs_to_cputime(delta); | 622 | return nsecs_to_cputime(delta); |
614 | } | 623 | } |
615 | 624 | ||
625 | static void __vtime_account_system(struct task_struct *tsk) | ||
626 | { | ||
627 | cputime_t delta_cpu = get_vtime_delta(tsk); | ||
628 | |||
629 | account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); | ||
630 | } | ||
631 | |||
616 | void vtime_account_system(struct task_struct *tsk) | 632 | void vtime_account_system(struct task_struct *tsk) |
617 | { | 633 | { |
618 | cputime_t delta_cpu; | 634 | if (!vtime_accounting_enabled()) |
635 | return; | ||
636 | |||
637 | write_seqlock(&tsk->vtime_seqlock); | ||
638 | __vtime_account_system(tsk); | ||
639 | write_sequnlock(&tsk->vtime_seqlock); | ||
640 | } | ||
619 | 641 | ||
642 | void vtime_account_irq_exit(struct task_struct *tsk) | ||
643 | { | ||
620 | if (!vtime_accounting_enabled()) | 644 | if (!vtime_accounting_enabled()) |
621 | return; | 645 | return; |
622 | 646 | ||
623 | delta_cpu = get_vtime_delta(); | 647 | write_seqlock(&tsk->vtime_seqlock); |
624 | account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); | 648 | if (context_tracking_in_user()) |
649 | tsk->vtime_snap_whence = VTIME_USER; | ||
650 | __vtime_account_system(tsk); | ||
651 | write_sequnlock(&tsk->vtime_seqlock); | ||
625 | } | 652 | } |
626 | 653 | ||
627 | void vtime_account_user(struct task_struct *tsk) | 654 | void vtime_account_user(struct task_struct *tsk) |
@@ -631,14 +658,44 @@ void vtime_account_user(struct task_struct *tsk) | |||
631 | if (!vtime_accounting_enabled()) | 658 | if (!vtime_accounting_enabled()) |
632 | return; | 659 | return; |
633 | 660 | ||
634 | delta_cpu = get_vtime_delta(); | 661 | delta_cpu = get_vtime_delta(tsk); |
635 | 662 | ||
663 | write_seqlock(&tsk->vtime_seqlock); | ||
664 | tsk->vtime_snap_whence = VTIME_SYS; | ||
636 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | 665 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); |
666 | write_sequnlock(&tsk->vtime_seqlock); | ||
667 | } | ||
668 | |||
669 | void vtime_user_enter(struct task_struct *tsk) | ||
670 | { | ||
671 | if (!vtime_accounting_enabled()) | ||
672 | return; | ||
673 | |||
674 | write_seqlock(&tsk->vtime_seqlock); | ||
675 | tsk->vtime_snap_whence = VTIME_USER; | ||
676 | __vtime_account_system(tsk); | ||
677 | write_sequnlock(&tsk->vtime_seqlock); | ||
678 | } | ||
679 | |||
680 | void vtime_guest_enter(struct task_struct *tsk) | ||
681 | { | ||
682 | write_seqlock(&tsk->vtime_seqlock); | ||
683 | __vtime_account_system(tsk); | ||
684 | current->flags |= PF_VCPU; | ||
685 | write_sequnlock(&tsk->vtime_seqlock); | ||
686 | } | ||
687 | |||
688 | void vtime_guest_exit(struct task_struct *tsk) | ||
689 | { | ||
690 | write_seqlock(&tsk->vtime_seqlock); | ||
691 | __vtime_account_system(tsk); | ||
692 | current->flags &= ~PF_VCPU; | ||
693 | write_sequnlock(&tsk->vtime_seqlock); | ||
637 | } | 694 | } |
638 | 695 | ||
639 | void vtime_account_idle(struct task_struct *tsk) | 696 | void vtime_account_idle(struct task_struct *tsk) |
640 | { | 697 | { |
641 | cputime_t delta_cpu = get_vtime_delta(); | 698 | cputime_t delta_cpu = get_vtime_delta(tsk); |
642 | 699 | ||
643 | account_idle_time(delta_cpu); | 700 | account_idle_time(delta_cpu); |
644 | } | 701 | } |
@@ -647,4 +704,116 @@ bool vtime_accounting_enabled(void) | |||
647 | { | 704 | { |
648 | return context_tracking_active(); | 705 | return context_tracking_active(); |
649 | } | 706 | } |
707 | |||
708 | void arch_vtime_task_switch(struct task_struct *prev) | ||
709 | { | ||
710 | write_seqlock(&prev->vtime_seqlock); | ||
711 | prev->vtime_snap_whence = VTIME_SLEEPING; | ||
712 | write_sequnlock(&prev->vtime_seqlock); | ||
713 | |||
714 | write_seqlock(¤t->vtime_seqlock); | ||
715 | current->vtime_snap_whence = VTIME_SYS; | ||
716 | current->vtime_snap = sched_clock(); | ||
717 | write_sequnlock(¤t->vtime_seqlock); | ||
718 | } | ||
719 | |||
720 | void vtime_init_idle(struct task_struct *t) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | |||
724 | write_seqlock_irqsave(&t->vtime_seqlock, flags); | ||
725 | t->vtime_snap_whence = VTIME_SYS; | ||
726 | t->vtime_snap = sched_clock(); | ||
727 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); | ||
728 | } | ||
729 | |||
730 | cputime_t task_gtime(struct task_struct *t) | ||
731 | { | ||
732 | unsigned long flags; | ||
733 | unsigned int seq; | ||
734 | cputime_t gtime; | ||
735 | |||
736 | do { | ||
737 | seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); | ||
738 | |||
739 | gtime = t->gtime; | ||
740 | if (t->flags & PF_VCPU) | ||
741 | gtime += vtime_delta(t); | ||
742 | |||
743 | } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); | ||
744 | |||
745 | return gtime; | ||
746 | } | ||
747 | |||
748 | /* | ||
749 | * Fetch cputime raw values from fields of task_struct and | ||
750 | * add up the pending nohz execution time since the last | ||
751 | * cputime snapshot. | ||
752 | */ | ||
753 | static void | ||
754 | fetch_task_cputime(struct task_struct *t, | ||
755 | cputime_t *u_dst, cputime_t *s_dst, | ||
756 | cputime_t *u_src, cputime_t *s_src, | ||
757 | cputime_t *udelta, cputime_t *sdelta) | ||
758 | { | ||
759 | unsigned long flags; | ||
760 | unsigned int seq; | ||
761 | unsigned long long delta; | ||
762 | |||
763 | do { | ||
764 | *udelta = 0; | ||
765 | *sdelta = 0; | ||
766 | |||
767 | seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); | ||
768 | |||
769 | if (u_dst) | ||
770 | *u_dst = *u_src; | ||
771 | if (s_dst) | ||
772 | *s_dst = *s_src; | ||
773 | |||
774 | /* Task is sleeping, nothing to add */ | ||
775 | if (t->vtime_snap_whence == VTIME_SLEEPING || | ||
776 | is_idle_task(t)) | ||
777 | continue; | ||
778 | |||
779 | delta = vtime_delta(t); | ||
780 | |||
781 | /* | ||
782 | * Task runs either in user or kernel space, add pending nohz time to | ||
783 | * the right place. | ||
784 | */ | ||
785 | if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { | ||
786 | *udelta = delta; | ||
787 | } else { | ||
788 | if (t->vtime_snap_whence == VTIME_SYS) | ||
789 | *sdelta = delta; | ||
790 | } | ||
791 | } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); | ||
792 | } | ||
793 | |||
794 | |||
795 | void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) | ||
796 | { | ||
797 | cputime_t udelta, sdelta; | ||
798 | |||
799 | fetch_task_cputime(t, utime, stime, &t->utime, | ||
800 | &t->stime, &udelta, &sdelta); | ||
801 | if (utime) | ||
802 | *utime += udelta; | ||
803 | if (stime) | ||
804 | *stime += sdelta; | ||
805 | } | ||
806 | |||
807 | void task_cputime_scaled(struct task_struct *t, | ||
808 | cputime_t *utimescaled, cputime_t *stimescaled) | ||
809 | { | ||
810 | cputime_t udelta, sdelta; | ||
811 | |||
812 | fetch_task_cputime(t, utimescaled, stimescaled, | ||
813 | &t->utimescaled, &t->stimescaled, &udelta, &sdelta); | ||
814 | if (utimescaled) | ||
815 | *utimescaled += cputime_to_scaled(udelta); | ||
816 | if (stimescaled) | ||
817 | *stimescaled += cputime_to_scaled(sdelta); | ||
818 | } | ||
650 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ | 819 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |
diff --git a/kernel/softirq.c b/kernel/softirq.c index ed567babe789..f5cc25f147a6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void) | |||
221 | current->flags &= ~PF_MEMALLOC; | 221 | current->flags &= ~PF_MEMALLOC; |
222 | 222 | ||
223 | pending = local_softirq_pending(); | 223 | pending = local_softirq_pending(); |
224 | vtime_account_irq_enter(current); | 224 | account_irq_enter_time(current); |
225 | 225 | ||
226 | __local_bh_disable((unsigned long)__builtin_return_address(0), | 226 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
227 | SOFTIRQ_OFFSET); | 227 | SOFTIRQ_OFFSET); |
@@ -272,7 +272,7 @@ restart: | |||
272 | 272 | ||
273 | lockdep_softirq_exit(); | 273 | lockdep_softirq_exit(); |
274 | 274 | ||
275 | vtime_account_irq_exit(current); | 275 | account_irq_exit_time(current); |
276 | __local_bh_enable(SOFTIRQ_OFFSET); | 276 | __local_bh_enable(SOFTIRQ_OFFSET); |
277 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); | 277 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); |
278 | } | 278 | } |
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void) | |||
341 | */ | 341 | */ |
342 | void irq_exit(void) | 342 | void irq_exit(void) |
343 | { | 343 | { |
344 | vtime_account_irq_exit(current); | 344 | account_irq_exit_time(current); |
345 | trace_hardirq_exit(); | 345 | trace_hardirq_exit(); |
346 | sub_preempt_count(IRQ_EXIT_OFFSET); | 346 | sub_preempt_count(IRQ_EXIT_OFFSET); |
347 | if (!in_interrupt() && local_softirq_pending()) | 347 | if (!in_interrupt() && local_softirq_pending()) |