diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2012-12-16 14:00:34 -0500 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2013-01-27 14:35:47 -0500 |
commit | 6a61671bb2f3a1bd12cd17b8fca811a624782632 (patch) | |
tree | 0afc2915fb7e517472710a49a524510322dd5baa /kernel/sched/cputime.c | |
parent | c11f11fcbdb5be790c565aed46411486a7586afc (diff) |
cputime: Safely read cputime of full dynticks CPUs
While remotely reading the cputime of a task running in a
full dynticks CPU, the values stored in utime/stime fields
of struct task_struct may be stale. Its values may be those
of the last kernel <-> user transition time snapshot and
we need to add the tickless time spent since this snapshot.
To fix this, flush the cputime of the dynticks CPUs on
kernel <-> user transition and record the time / context
where we did this. Then on top of this snapshot and the current
time, perform the fixup on the reader side from task_times()
accessors.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[fixed kvm module related build errors]
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Diffstat (limited to 'kernel/sched/cputime.c')
-rw-r--r-- | kernel/sched/cputime.c | 193 |
1 files changed, 181 insertions, 12 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a44ecdf809a1..082e05d915b4 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -492,7 +492,7 @@ void vtime_task_switch(struct task_struct *prev) | |||
492 | * vtime_account(). | 492 | * vtime_account(). |
493 | */ | 493 | */ |
494 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 494 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
495 | void vtime_account(struct task_struct *tsk) | 495 | void vtime_account_irq_enter(struct task_struct *tsk) |
496 | { | 496 | { |
497 | if (!vtime_accounting_enabled()) | 497 | if (!vtime_accounting_enabled()) |
498 | return; | 498 | return; |
@@ -516,7 +516,7 @@ void vtime_account(struct task_struct *tsk) | |||
516 | } | 516 | } |
517 | vtime_account_system(tsk); | 517 | vtime_account_system(tsk); |
518 | } | 518 | } |
519 | EXPORT_SYMBOL_GPL(vtime_account); | 519 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); |
520 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 520 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
521 | 521 | ||
522 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ | 522 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
@@ -600,28 +600,55 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime | |||
600 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ | 600 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
601 | 601 | ||
602 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | 602 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
603 | static DEFINE_PER_CPU(unsigned long long, cputime_snap); | 603 | static unsigned long long vtime_delta(struct task_struct *tsk) |
604 | { | ||
605 | unsigned long long clock; | ||
606 | |||
607 | clock = sched_clock(); | ||
608 | if (clock < tsk->vtime_snap) | ||
609 | return 0; | ||
604 | 610 | ||
605 | static cputime_t get_vtime_delta(void) | 611 | return clock - tsk->vtime_snap; |
612 | } | ||
613 | |||
614 | static cputime_t get_vtime_delta(struct task_struct *tsk) | ||
606 | { | 615 | { |
607 | unsigned long long delta; | 616 | unsigned long long delta = vtime_delta(tsk); |
608 | 617 | ||
609 | delta = sched_clock() - __this_cpu_read(cputime_snap); | 618 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); |
610 | __this_cpu_add(cputime_snap, delta); | 619 | tsk->vtime_snap += delta; |
611 | 620 | ||
612 | /* CHECKME: always safe to convert nsecs to cputime? */ | 621 | /* CHECKME: always safe to convert nsecs to cputime? */ |
613 | return nsecs_to_cputime(delta); | 622 | return nsecs_to_cputime(delta); |
614 | } | 623 | } |
615 | 624 | ||
625 | static void __vtime_account_system(struct task_struct *tsk) | ||
626 | { | ||
627 | cputime_t delta_cpu = get_vtime_delta(tsk); | ||
628 | |||
629 | account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); | ||
630 | } | ||
631 | |||
616 | void vtime_account_system(struct task_struct *tsk) | 632 | void vtime_account_system(struct task_struct *tsk) |
617 | { | 633 | { |
618 | cputime_t delta_cpu; | 634 | if (!vtime_accounting_enabled()) |
635 | return; | ||
636 | |||
637 | write_seqlock(&tsk->vtime_seqlock); | ||
638 | __vtime_account_system(tsk); | ||
639 | write_sequnlock(&tsk->vtime_seqlock); | ||
640 | } | ||
619 | 641 | ||
642 | void vtime_account_irq_exit(struct task_struct *tsk) | ||
643 | { | ||
620 | if (!vtime_accounting_enabled()) | 644 | if (!vtime_accounting_enabled()) |
621 | return; | 645 | return; |
622 | 646 | ||
623 | delta_cpu = get_vtime_delta(); | 647 | write_seqlock(&tsk->vtime_seqlock); |
624 | account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); | 648 | if (context_tracking_in_user()) |
649 | tsk->vtime_snap_whence = VTIME_USER; | ||
650 | __vtime_account_system(tsk); | ||
651 | write_sequnlock(&tsk->vtime_seqlock); | ||
625 | } | 652 | } |
626 | 653 | ||
627 | void vtime_account_user(struct task_struct *tsk) | 654 | void vtime_account_user(struct task_struct *tsk) |
@@ -631,14 +658,44 @@ void vtime_account_user(struct task_struct *tsk) | |||
631 | if (!vtime_accounting_enabled()) | 658 | if (!vtime_accounting_enabled()) |
632 | return; | 659 | return; |
633 | 660 | ||
634 | delta_cpu = get_vtime_delta(); | 661 | delta_cpu = get_vtime_delta(tsk); |
635 | 662 | ||
663 | write_seqlock(&tsk->vtime_seqlock); | ||
664 | tsk->vtime_snap_whence = VTIME_SYS; | ||
636 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | 665 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); |
666 | write_sequnlock(&tsk->vtime_seqlock); | ||
667 | } | ||
668 | |||
669 | void vtime_user_enter(struct task_struct *tsk) | ||
670 | { | ||
671 | if (!vtime_accounting_enabled()) | ||
672 | return; | ||
673 | |||
674 | write_seqlock(&tsk->vtime_seqlock); | ||
675 | tsk->vtime_snap_whence = VTIME_USER; | ||
676 | __vtime_account_system(tsk); | ||
677 | write_sequnlock(&tsk->vtime_seqlock); | ||
678 | } | ||
679 | |||
680 | void vtime_guest_enter(struct task_struct *tsk) | ||
681 | { | ||
682 | write_seqlock(&tsk->vtime_seqlock); | ||
683 | __vtime_account_system(tsk); | ||
684 | current->flags |= PF_VCPU; | ||
685 | write_sequnlock(&tsk->vtime_seqlock); | ||
686 | } | ||
687 | |||
688 | void vtime_guest_exit(struct task_struct *tsk) | ||
689 | { | ||
690 | write_seqlock(&tsk->vtime_seqlock); | ||
691 | __vtime_account_system(tsk); | ||
692 | current->flags &= ~PF_VCPU; | ||
693 | write_sequnlock(&tsk->vtime_seqlock); | ||
637 | } | 694 | } |
638 | 695 | ||
639 | void vtime_account_idle(struct task_struct *tsk) | 696 | void vtime_account_idle(struct task_struct *tsk) |
640 | { | 697 | { |
641 | cputime_t delta_cpu = get_vtime_delta(); | 698 | cputime_t delta_cpu = get_vtime_delta(tsk); |
642 | 699 | ||
643 | account_idle_time(delta_cpu); | 700 | account_idle_time(delta_cpu); |
644 | } | 701 | } |
@@ -647,4 +704,116 @@ bool vtime_accounting_enabled(void) | |||
647 | { | 704 | { |
648 | return context_tracking_active(); | 705 | return context_tracking_active(); |
649 | } | 706 | } |
707 | |||
708 | void arch_vtime_task_switch(struct task_struct *prev) | ||
709 | { | ||
710 | write_seqlock(&prev->vtime_seqlock); | ||
711 | prev->vtime_snap_whence = VTIME_SLEEPING; | ||
712 | write_sequnlock(&prev->vtime_seqlock); | ||
713 | |||
714 | write_seqlock(¤t->vtime_seqlock); | ||
715 | current->vtime_snap_whence = VTIME_SYS; | ||
716 | current->vtime_snap = sched_clock(); | ||
717 | write_sequnlock(¤t->vtime_seqlock); | ||
718 | } | ||
719 | |||
720 | void vtime_init_idle(struct task_struct *t) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | |||
724 | write_seqlock_irqsave(&t->vtime_seqlock, flags); | ||
725 | t->vtime_snap_whence = VTIME_SYS; | ||
726 | t->vtime_snap = sched_clock(); | ||
727 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); | ||
728 | } | ||
729 | |||
730 | cputime_t task_gtime(struct task_struct *t) | ||
731 | { | ||
732 | unsigned long flags; | ||
733 | unsigned int seq; | ||
734 | cputime_t gtime; | ||
735 | |||
736 | do { | ||
737 | seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); | ||
738 | |||
739 | gtime = t->gtime; | ||
740 | if (t->flags & PF_VCPU) | ||
741 | gtime += vtime_delta(t); | ||
742 | |||
743 | } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); | ||
744 | |||
745 | return gtime; | ||
746 | } | ||
747 | |||
748 | /* | ||
749 | * Fetch cputime raw values from fields of task_struct and | ||
750 | * add up the pending nohz execution time since the last | ||
751 | * cputime snapshot. | ||
752 | */ | ||
753 | static void | ||
754 | fetch_task_cputime(struct task_struct *t, | ||
755 | cputime_t *u_dst, cputime_t *s_dst, | ||
756 | cputime_t *u_src, cputime_t *s_src, | ||
757 | cputime_t *udelta, cputime_t *sdelta) | ||
758 | { | ||
759 | unsigned long flags; | ||
760 | unsigned int seq; | ||
761 | unsigned long long delta; | ||
762 | |||
763 | do { | ||
764 | *udelta = 0; | ||
765 | *sdelta = 0; | ||
766 | |||
767 | seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); | ||
768 | |||
769 | if (u_dst) | ||
770 | *u_dst = *u_src; | ||
771 | if (s_dst) | ||
772 | *s_dst = *s_src; | ||
773 | |||
774 | /* Task is sleeping, nothing to add */ | ||
775 | if (t->vtime_snap_whence == VTIME_SLEEPING || | ||
776 | is_idle_task(t)) | ||
777 | continue; | ||
778 | |||
779 | delta = vtime_delta(t); | ||
780 | |||
781 | /* | ||
782 | * Task runs either in user or kernel space, add pending nohz time to | ||
783 | * the right place. | ||
784 | */ | ||
785 | if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { | ||
786 | *udelta = delta; | ||
787 | } else { | ||
788 | if (t->vtime_snap_whence == VTIME_SYS) | ||
789 | *sdelta = delta; | ||
790 | } | ||
791 | } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); | ||
792 | } | ||
793 | |||
794 | |||
795 | void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) | ||
796 | { | ||
797 | cputime_t udelta, sdelta; | ||
798 | |||
799 | fetch_task_cputime(t, utime, stime, &t->utime, | ||
800 | &t->stime, &udelta, &sdelta); | ||
801 | if (utime) | ||
802 | *utime += udelta; | ||
803 | if (stime) | ||
804 | *stime += sdelta; | ||
805 | } | ||
806 | |||
807 | void task_cputime_scaled(struct task_struct *t, | ||
808 | cputime_t *utimescaled, cputime_t *stimescaled) | ||
809 | { | ||
810 | cputime_t udelta, sdelta; | ||
811 | |||
812 | fetch_task_cputime(t, utimescaled, stimescaled, | ||
813 | &t->utimescaled, &t->stimescaled, &udelta, &sdelta); | ||
814 | if (utimescaled) | ||
815 | *utimescaled += cputime_to_scaled(udelta); | ||
816 | if (stimescaled) | ||
817 | *stimescaled += cputime_to_scaled(sdelta); | ||
818 | } | ||
650 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ | 819 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |