aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/cputime.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2012-12-16 14:00:34 -0500
committerFrederic Weisbecker <fweisbec@gmail.com>2013-01-27 14:35:47 -0500
commit6a61671bb2f3a1bd12cd17b8fca811a624782632 (patch)
tree0afc2915fb7e517472710a49a524510322dd5baa /kernel/sched/cputime.c
parentc11f11fcbdb5be790c565aed46411486a7586afc (diff)
cputime: Safely read cputime of full dynticks CPUs
While remotely reading the cputime of a task running in a full dynticks CPU, the values stored in utime/stime fields of struct task_struct may be stale. Its values may be those of the last kernel <-> user transition time snapshot and we need to add the tickless time spent since this snapshot. To fix this, flush the cputime of the dynticks CPUs on kernel <-> user transition and record the time / context where we did this. Then on top of this snapshot and the current time, perform the fixup on the reader side from task_times() accessors. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Li Zhong <zhong@linux.vnet.ibm.com> Cc: Namhyung Kim <namhyung.kim@lge.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> [fixed kvm module related build errors] Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Diffstat (limited to 'kernel/sched/cputime.c')
-rw-r--r--kernel/sched/cputime.c193
1 files changed, 181 insertions, 12 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a44ecdf809a1..082e05d915b4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -492,7 +492,7 @@ void vtime_task_switch(struct task_struct *prev)
492 * vtime_account(). 492 * vtime_account().
493 */ 493 */
494#ifndef __ARCH_HAS_VTIME_ACCOUNT 494#ifndef __ARCH_HAS_VTIME_ACCOUNT
495void vtime_account(struct task_struct *tsk) 495void vtime_account_irq_enter(struct task_struct *tsk)
496{ 496{
497 if (!vtime_accounting_enabled()) 497 if (!vtime_accounting_enabled())
498 return; 498 return;
@@ -516,7 +516,7 @@ void vtime_account(struct task_struct *tsk)
516 } 516 }
517 vtime_account_system(tsk); 517 vtime_account_system(tsk);
518} 518}
519EXPORT_SYMBOL_GPL(vtime_account); 519EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
520#endif /* __ARCH_HAS_VTIME_ACCOUNT */ 520#endif /* __ARCH_HAS_VTIME_ACCOUNT */
521 521
522#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ 522#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
@@ -600,28 +600,55 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
600#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ 600#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
601 601
602#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 602#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
603static DEFINE_PER_CPU(unsigned long long, cputime_snap); 603static unsigned long long vtime_delta(struct task_struct *tsk)
604{
605 unsigned long long clock;
606
607 clock = sched_clock();
608 if (clock < tsk->vtime_snap)
609 return 0;
604 610
605static cputime_t get_vtime_delta(void) 611 return clock - tsk->vtime_snap;
612}
613
614static cputime_t get_vtime_delta(struct task_struct *tsk)
606{ 615{
607 unsigned long long delta; 616 unsigned long long delta = vtime_delta(tsk);
608 617
609 delta = sched_clock() - __this_cpu_read(cputime_snap); 618 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
610 __this_cpu_add(cputime_snap, delta); 619 tsk->vtime_snap += delta;
611 620
612 /* CHECKME: always safe to convert nsecs to cputime? */ 621 /* CHECKME: always safe to convert nsecs to cputime? */
613 return nsecs_to_cputime(delta); 622 return nsecs_to_cputime(delta);
614} 623}
615 624
625static void __vtime_account_system(struct task_struct *tsk)
626{
627 cputime_t delta_cpu = get_vtime_delta(tsk);
628
629 account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
630}
631
616void vtime_account_system(struct task_struct *tsk) 632void vtime_account_system(struct task_struct *tsk)
617{ 633{
618 cputime_t delta_cpu; 634 if (!vtime_accounting_enabled())
635 return;
636
637 write_seqlock(&tsk->vtime_seqlock);
638 __vtime_account_system(tsk);
639 write_sequnlock(&tsk->vtime_seqlock);
640}
619 641
642void vtime_account_irq_exit(struct task_struct *tsk)
643{
620 if (!vtime_accounting_enabled()) 644 if (!vtime_accounting_enabled())
621 return; 645 return;
622 646
623 delta_cpu = get_vtime_delta(); 647 write_seqlock(&tsk->vtime_seqlock);
624 account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); 648 if (context_tracking_in_user())
649 tsk->vtime_snap_whence = VTIME_USER;
650 __vtime_account_system(tsk);
651 write_sequnlock(&tsk->vtime_seqlock);
625} 652}
626 653
627void vtime_account_user(struct task_struct *tsk) 654void vtime_account_user(struct task_struct *tsk)
@@ -631,14 +658,44 @@ void vtime_account_user(struct task_struct *tsk)
631 if (!vtime_accounting_enabled()) 658 if (!vtime_accounting_enabled())
632 return; 659 return;
633 660
634 delta_cpu = get_vtime_delta(); 661 delta_cpu = get_vtime_delta(tsk);
635 662
663 write_seqlock(&tsk->vtime_seqlock);
664 tsk->vtime_snap_whence = VTIME_SYS;
636 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 665 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
666 write_sequnlock(&tsk->vtime_seqlock);
667}
668
669void vtime_user_enter(struct task_struct *tsk)
670{
671 if (!vtime_accounting_enabled())
672 return;
673
674 write_seqlock(&tsk->vtime_seqlock);
675 tsk->vtime_snap_whence = VTIME_USER;
676 __vtime_account_system(tsk);
677 write_sequnlock(&tsk->vtime_seqlock);
678}
679
680void vtime_guest_enter(struct task_struct *tsk)
681{
682 write_seqlock(&tsk->vtime_seqlock);
683 __vtime_account_system(tsk);
684 current->flags |= PF_VCPU;
685 write_sequnlock(&tsk->vtime_seqlock);
686}
687
688void vtime_guest_exit(struct task_struct *tsk)
689{
690 write_seqlock(&tsk->vtime_seqlock);
691 __vtime_account_system(tsk);
692 current->flags &= ~PF_VCPU;
693 write_sequnlock(&tsk->vtime_seqlock);
637} 694}
638 695
639void vtime_account_idle(struct task_struct *tsk) 696void vtime_account_idle(struct task_struct *tsk)
640{ 697{
641 cputime_t delta_cpu = get_vtime_delta(); 698 cputime_t delta_cpu = get_vtime_delta(tsk);
642 699
643 account_idle_time(delta_cpu); 700 account_idle_time(delta_cpu);
644} 701}
@@ -647,4 +704,116 @@ bool vtime_accounting_enabled(void)
647{ 704{
648 return context_tracking_active(); 705 return context_tracking_active();
649} 706}
707
708void arch_vtime_task_switch(struct task_struct *prev)
709{
710 write_seqlock(&prev->vtime_seqlock);
711 prev->vtime_snap_whence = VTIME_SLEEPING;
712 write_sequnlock(&prev->vtime_seqlock);
713
714 write_seqlock(&current->vtime_seqlock);
715 current->vtime_snap_whence = VTIME_SYS;
716 current->vtime_snap = sched_clock();
717 write_sequnlock(&current->vtime_seqlock);
718}
719
720void vtime_init_idle(struct task_struct *t)
721{
722 unsigned long flags;
723
724 write_seqlock_irqsave(&t->vtime_seqlock, flags);
725 t->vtime_snap_whence = VTIME_SYS;
726 t->vtime_snap = sched_clock();
727 write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
728}
729
730cputime_t task_gtime(struct task_struct *t)
731{
732 unsigned long flags;
733 unsigned int seq;
734 cputime_t gtime;
735
736 do {
737 seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags);
738
739 gtime = t->gtime;
740 if (t->flags & PF_VCPU)
741 gtime += vtime_delta(t);
742
743 } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags));
744
745 return gtime;
746}
747
748/*
749 * Fetch cputime raw values from fields of task_struct and
750 * add up the pending nohz execution time since the last
751 * cputime snapshot.
752 */
753static void
754fetch_task_cputime(struct task_struct *t,
755 cputime_t *u_dst, cputime_t *s_dst,
756 cputime_t *u_src, cputime_t *s_src,
757 cputime_t *udelta, cputime_t *sdelta)
758{
759 unsigned long flags;
760 unsigned int seq;
761 unsigned long long delta;
762
763 do {
764 *udelta = 0;
765 *sdelta = 0;
766
767 seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags);
768
769 if (u_dst)
770 *u_dst = *u_src;
771 if (s_dst)
772 *s_dst = *s_src;
773
774 /* Task is sleeping, nothing to add */
775 if (t->vtime_snap_whence == VTIME_SLEEPING ||
776 is_idle_task(t))
777 continue;
778
779 delta = vtime_delta(t);
780
781 /*
782 * Task runs either in user or kernel space, add pending nohz time to
783 * the right place.
784 */
785 if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
786 *udelta = delta;
787 } else {
788 if (t->vtime_snap_whence == VTIME_SYS)
789 *sdelta = delta;
790 }
791 } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags));
792}
793
794
795void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
796{
797 cputime_t udelta, sdelta;
798
799 fetch_task_cputime(t, utime, stime, &t->utime,
800 &t->stime, &udelta, &sdelta);
801 if (utime)
802 *utime += udelta;
803 if (stime)
804 *stime += sdelta;
805}
806
807void task_cputime_scaled(struct task_struct *t,
808 cputime_t *utimescaled, cputime_t *stimescaled)
809{
810 cputime_t udelta, sdelta;
811
812 fetch_task_cputime(t, utimescaled, stimescaled,
813 &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
814 if (utimescaled)
815 *utimescaled += cputime_to_scaled(udelta);
816 if (stimescaled)
817 *stimescaled += cputime_to_scaled(sdelta);
818}
650#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 819#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */