aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2012-10-04 07:18:31 -0400
committerIngo Molnar <mingo@kernel.org>2012-10-24 04:27:28 -0400
commit82958366cfea1a50e7e90907b2d55ae29ed69974 (patch)
treeffd8ed3800e17ccc3f051e1ce6c8b25dde7bf906 /kernel
parentf1b17280efbd21873d1db8631117bdbccbcb39a2 (diff)
sched: Replace update_shares weight distribution with per-entity computation
Now that the machinery in place is in place to compute contributed load in a bottom up fashion; replace the shares distribution code within update_shares() accordingly. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Ben Segall <bsegall@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120823141507.061208672@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/debug.c8
-rw-r--r--kernel/sched/fair.c157
-rw-r--r--kernel/sched/sched.h36
3 files changed, 36 insertions, 165 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 71b0ea325e93..2cd3c1b4e582 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -218,14 +218,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
218 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); 218 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
219#ifdef CONFIG_FAIR_GROUP_SCHED 219#ifdef CONFIG_FAIR_GROUP_SCHED
220#ifdef CONFIG_SMP 220#ifdef CONFIG_SMP
221 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
222 SPLIT_NS(cfs_rq->load_avg));
223 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
224 SPLIT_NS(cfs_rq->load_period));
225 SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
226 cfs_rq->load_contribution);
227 SEQ_printf(m, " .%-30s: %d\n", "load_tg",
228 atomic_read(&cfs_rq->tg->load_weight));
229 SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg", 221 SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg",
230 cfs_rq->runnable_load_avg); 222 cfs_rq->runnable_load_avg);
231 SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", 223 SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 873c9f5c5796..57fae95eed99 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -658,9 +658,6 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
658 return calc_delta_fair(sched_slice(cfs_rq, se), se); 658 return calc_delta_fair(sched_slice(cfs_rq, se), se);
659} 659}
660 660
661static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
662static void update_cfs_shares(struct cfs_rq *cfs_rq);
663
664/* 661/*
665 * Update the current task's runtime statistics. Skip current tasks that 662 * Update the current task's runtime statistics. Skip current tasks that
666 * are not in our scheduling class. 663 * are not in our scheduling class.
@@ -680,10 +677,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
680 677
681 curr->vruntime += delta_exec_weighted; 678 curr->vruntime += delta_exec_weighted;
682 update_min_vruntime(cfs_rq); 679 update_min_vruntime(cfs_rq);
683
684#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
685 cfs_rq->load_unacc_exec_time += delta_exec;
686#endif
687} 680}
688 681
689static void update_curr(struct cfs_rq *cfs_rq) 682static void update_curr(struct cfs_rq *cfs_rq)
@@ -806,72 +799,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
806} 799}
807 800
808#ifdef CONFIG_FAIR_GROUP_SCHED 801#ifdef CONFIG_FAIR_GROUP_SCHED
809/* we need this in update_cfs_load and load-balance functions below */
810static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
811# ifdef CONFIG_SMP 802# ifdef CONFIG_SMP
812static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
813 int global_update)
814{
815 struct task_group *tg = cfs_rq->tg;
816 long load_avg;
817
818 load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
819 load_avg -= cfs_rq->load_contribution;
820
821 if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
822 atomic_add(load_avg, &tg->load_weight);
823 cfs_rq->load_contribution += load_avg;
824 }
825}
826
827static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
828{
829 u64 period = sysctl_sched_shares_window;
830 u64 now, delta;
831 unsigned long load = cfs_rq->load.weight;
832
833 if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq))
834 return;
835
836 now = rq_of(cfs_rq)->clock_task;
837 delta = now - cfs_rq->load_stamp;
838
839 /* truncate load history at 4 idle periods */
840 if (cfs_rq->load_stamp > cfs_rq->load_last &&
841 now - cfs_rq->load_last > 4 * period) {
842 cfs_rq->load_period = 0;
843 cfs_rq->load_avg = 0;
844 delta = period - 1;
845 }
846
847 cfs_rq->load_stamp = now;
848 cfs_rq->load_unacc_exec_time = 0;
849 cfs_rq->load_period += delta;
850 if (load) {
851 cfs_rq->load_last = now;
852 cfs_rq->load_avg += delta * load;
853 }
854
855 /* consider updating load contribution on each fold or truncate */
856 if (global_update || cfs_rq->load_period > period
857 || !cfs_rq->load_period)
858 update_cfs_rq_load_contribution(cfs_rq, global_update);
859
860 while (cfs_rq->load_period > period) {
861 /*
862 * Inline assembly required to prevent the compiler
863 * optimising this loop into a divmod call.
864 * See __iter_div_u64_rem() for another example of this.
865 */
866 asm("" : "+rm" (cfs_rq->load_period));
867 cfs_rq->load_period /= 2;
868 cfs_rq->load_avg /= 2;
869 }
870
871 if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
872 list_del_leaf_cfs_rq(cfs_rq);
873}
874
875static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) 803static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
876{ 804{
877 long tg_weight; 805 long tg_weight;
@@ -881,8 +809,8 @@ static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
881 * to gain a more accurate current total weight. See 809 * to gain a more accurate current total weight. See
882 * update_cfs_rq_load_contribution(). 810 * update_cfs_rq_load_contribution().
883 */ 811 */
884 tg_weight = atomic_read(&tg->load_weight); 812 tg_weight = atomic64_read(&tg->load_avg);
885 tg_weight -= cfs_rq->load_contribution; 813 tg_weight -= cfs_rq->tg_load_contrib;
886 tg_weight += cfs_rq->load.weight; 814 tg_weight += cfs_rq->load.weight;
887 815
888 return tg_weight; 816 return tg_weight;
@@ -906,27 +834,11 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
906 834
907 return shares; 835 return shares;
908} 836}
909
910static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
911{
912 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
913 update_cfs_load(cfs_rq, 0);
914 update_cfs_shares(cfs_rq);
915 }
916}
917# else /* CONFIG_SMP */ 837# else /* CONFIG_SMP */
918static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
919{
920}
921
922static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) 838static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
923{ 839{
924 return tg->shares; 840 return tg->shares;
925} 841}
926
927static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
928{
929}
930# endif /* CONFIG_SMP */ 842# endif /* CONFIG_SMP */
931static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, 843static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
932 unsigned long weight) 844 unsigned long weight)
@@ -944,6 +856,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
944 account_entity_enqueue(cfs_rq, se); 856 account_entity_enqueue(cfs_rq, se);
945} 857}
946 858
859static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
860
947static void update_cfs_shares(struct cfs_rq *cfs_rq) 861static void update_cfs_shares(struct cfs_rq *cfs_rq)
948{ 862{
949 struct task_group *tg; 863 struct task_group *tg;
@@ -963,17 +877,9 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
963 reweight_entity(cfs_rq_of(se), se, shares); 877 reweight_entity(cfs_rq_of(se), se, shares);
964} 878}
965#else /* CONFIG_FAIR_GROUP_SCHED */ 879#else /* CONFIG_FAIR_GROUP_SCHED */
966static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
967{
968}
969
970static inline void update_cfs_shares(struct cfs_rq *cfs_rq) 880static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
971{ 881{
972} 882}
973
974static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
975{
976}
977#endif /* CONFIG_FAIR_GROUP_SCHED */ 883#endif /* CONFIG_FAIR_GROUP_SCHED */
978 884
979#ifdef CONFIG_SMP 885#ifdef CONFIG_SMP
@@ -1490,7 +1396,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1490 * Update run-time statistics of the 'current'. 1396 * Update run-time statistics of the 'current'.
1491 */ 1397 */
1492 update_curr(cfs_rq); 1398 update_curr(cfs_rq);
1493 update_cfs_load(cfs_rq, 0);
1494 enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP); 1399 enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);
1495 account_entity_enqueue(cfs_rq, se); 1400 account_entity_enqueue(cfs_rq, se);
1496 update_cfs_shares(cfs_rq); 1401 update_cfs_shares(cfs_rq);
@@ -1587,7 +1492,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1587 if (se != cfs_rq->curr) 1492 if (se != cfs_rq->curr)
1588 __dequeue_entity(cfs_rq, se); 1493 __dequeue_entity(cfs_rq, se);
1589 se->on_rq = 0; 1494 se->on_rq = 0;
1590 update_cfs_load(cfs_rq, 0);
1591 account_entity_dequeue(cfs_rq, se); 1495 account_entity_dequeue(cfs_rq, se);
1592 1496
1593 /* 1497 /*
@@ -1756,11 +1660,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
1756 update_entity_load_avg(curr, 1); 1660 update_entity_load_avg(curr, 1);
1757 update_cfs_rq_blocked_load(cfs_rq, 1); 1661 update_cfs_rq_blocked_load(cfs_rq, 1);
1758 1662
1759 /*
1760 * Update share accounting for long-running entities.
1761 */
1762 update_entity_shares_tick(cfs_rq);
1763
1764#ifdef CONFIG_SCHED_HRTICK 1663#ifdef CONFIG_SCHED_HRTICK
1765 /* 1664 /*
1766 * queued ticks are scheduled to match the slice, so don't bother 1665 * queued ticks are scheduled to match the slice, so don't bother
@@ -2005,18 +1904,9 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
2005 cfs_rq->throttle_count--; 1904 cfs_rq->throttle_count--;
2006#ifdef CONFIG_SMP 1905#ifdef CONFIG_SMP
2007 if (!cfs_rq->throttle_count) { 1906 if (!cfs_rq->throttle_count) {
2008 u64 delta = rq->clock_task - cfs_rq->load_stamp;
2009
2010 /* leaving throttled state, advance shares averaging windows */
2011 cfs_rq->load_stamp += delta;
2012 cfs_rq->load_last += delta;
2013
2014 /* adjust cfs_rq_clock_task() */ 1907 /* adjust cfs_rq_clock_task() */
2015 cfs_rq->throttled_clock_task_time += rq->clock_task - 1908 cfs_rq->throttled_clock_task_time += rq->clock_task -
2016 cfs_rq->throttled_clock_task; 1909 cfs_rq->throttled_clock_task;
2017
2018 /* update entity weight now that we are on_rq again */
2019 update_cfs_shares(cfs_rq);
2020 } 1910 }
2021#endif 1911#endif
2022 1912
@@ -2028,11 +1918,9 @@ static int tg_throttle_down(struct task_group *tg, void *data)
2028 struct rq *rq = data; 1918 struct rq *rq = data;
2029 struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; 1919 struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
2030 1920
2031 /* group is entering throttled state, record last load */ 1921 /* group is entering throttled state, stop time */
2032 if (!cfs_rq->throttle_count) { 1922 if (!cfs_rq->throttle_count)
2033 update_cfs_load(cfs_rq, 0);
2034 cfs_rq->throttled_clock_task = rq->clock_task; 1923 cfs_rq->throttled_clock_task = rq->clock_task;
2035 }
2036 cfs_rq->throttle_count++; 1924 cfs_rq->throttle_count++;
2037 1925
2038 return 0; 1926 return 0;
@@ -2630,7 +2518,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
2630 if (cfs_rq_throttled(cfs_rq)) 2518 if (cfs_rq_throttled(cfs_rq))
2631 break; 2519 break;
2632 2520
2633 update_cfs_load(cfs_rq, 0);
2634 update_cfs_shares(cfs_rq); 2521 update_cfs_shares(cfs_rq);
2635 update_entity_load_avg(se, 1); 2522 update_entity_load_avg(se, 1);
2636 } 2523 }
@@ -2692,7 +2579,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
2692 if (cfs_rq_throttled(cfs_rq)) 2579 if (cfs_rq_throttled(cfs_rq))
2693 break; 2580 break;
2694 2581
2695 update_cfs_load(cfs_rq, 0);
2696 update_cfs_shares(cfs_rq); 2582 update_cfs_shares(cfs_rq);
2697 update_entity_load_avg(se, 1); 2583 update_entity_load_avg(se, 1);
2698 } 2584 }
@@ -3755,27 +3641,36 @@ next:
3755 */ 3641 */
3756static int update_shares_cpu(struct task_group *tg, int cpu) 3642static int update_shares_cpu(struct task_group *tg, int cpu)
3757{ 3643{
3644 struct sched_entity *se;
3758 struct cfs_rq *cfs_rq; 3645 struct cfs_rq *cfs_rq;
3759 unsigned long flags; 3646 unsigned long flags;
3760 struct rq *rq; 3647 struct rq *rq;
3761 3648
3762 if (!tg->se[cpu])
3763 return 0;
3764
3765 rq = cpu_rq(cpu); 3649 rq = cpu_rq(cpu);
3650 se = tg->se[cpu];
3766 cfs_rq = tg->cfs_rq[cpu]; 3651 cfs_rq = tg->cfs_rq[cpu];
3767 3652
3768 raw_spin_lock_irqsave(&rq->lock, flags); 3653 raw_spin_lock_irqsave(&rq->lock, flags);
3769 3654
3770 update_rq_clock(rq); 3655 update_rq_clock(rq);
3771 update_cfs_load(cfs_rq, 1);
3772 update_cfs_rq_blocked_load(cfs_rq, 1); 3656 update_cfs_rq_blocked_load(cfs_rq, 1);
3773 3657
3774 /* 3658 if (se) {
3775 * We need to update shares after updating tg->load_weight in 3659 update_entity_load_avg(se, 1);
3776 * order to adjust the weight of groups with long running tasks. 3660 /*
3777 */ 3661 * We pivot on our runnable average having decayed to zero for
3778 update_cfs_shares(cfs_rq); 3662 * list removal. This generally implies that all our children
3663 * have also been removed (modulo rounding error or bandwidth
3664 * control); however, such cases are rare and we can fix these
3665 * at enqueue.
3666 *
3667 * TODO: fix up out-of-order children on enqueue.
3668 */
3669 if (!se->avg.runnable_avg_sum && !cfs_rq->nr_running)
3670 list_del_leaf_cfs_rq(cfs_rq);
3671 } else {
3672 update_rq_runnable_avg(rq, rq->nr_running);
3673 }
3779 3674
3780 raw_spin_unlock_irqrestore(&rq->lock, flags); 3675 raw_spin_unlock_irqrestore(&rq->lock, flags);
3781 3676
@@ -5702,10 +5597,6 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
5702 5597
5703 cfs_rq->tg = tg; 5598 cfs_rq->tg = tg;
5704 cfs_rq->rq = rq; 5599 cfs_rq->rq = rq;
5705#ifdef CONFIG_SMP
5706 /* allow initial update_cfs_load() to truncate */
5707 cfs_rq->load_stamp = 1;
5708#endif
5709 init_cfs_rq_runtime(cfs_rq); 5600 init_cfs_rq_runtime(cfs_rq);
5710 5601
5711 tg->cfs_rq[cpu] = cfs_rq; 5602 tg->cfs_rq[cpu] = cfs_rq;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d13bce7a44ef..0a75a430ca77 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -234,11 +234,21 @@ struct cfs_rq {
234 u64 runnable_load_avg, blocked_load_avg; 234 u64 runnable_load_avg, blocked_load_avg;
235 atomic64_t decay_counter, removed_load; 235 atomic64_t decay_counter, removed_load;
236 u64 last_decay; 236 u64 last_decay;
237
237#ifdef CONFIG_FAIR_GROUP_SCHED 238#ifdef CONFIG_FAIR_GROUP_SCHED
238 u32 tg_runnable_contrib; 239 u32 tg_runnable_contrib;
239 u64 tg_load_contrib; 240 u64 tg_load_contrib;
240#endif 241#endif /* CONFIG_FAIR_GROUP_SCHED */
241#endif 242
243 /*
244 * h_load = weight * f(tg)
245 *
246 * Where f(tg) is the recursive weight fraction assigned to
247 * this group.
248 */
249 unsigned long h_load;
250#endif /* CONFIG_SMP */
251
242#ifdef CONFIG_FAIR_GROUP_SCHED 252#ifdef CONFIG_FAIR_GROUP_SCHED
243 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ 253 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
244 254
@@ -254,28 +264,6 @@ struct cfs_rq {
254 struct list_head leaf_cfs_rq_list; 264 struct list_head leaf_cfs_rq_list;
255 struct task_group *tg; /* group that "owns" this runqueue */ 265 struct task_group *tg; /* group that "owns" this runqueue */
256 266
257#ifdef CONFIG_SMP
258 /*
259 * h_load = weight * f(tg)
260 *
261 * Where f(tg) is the recursive weight fraction assigned to
262 * this group.
263 */
264 unsigned long h_load;
265
266 /*
267 * Maintaining per-cpu shares distribution for group scheduling
268 *
269 * load_stamp is the last time we updated the load average
270 * load_last is the last time we updated the load average and saw load
271 * load_unacc_exec_time is currently unaccounted execution time
272 */
273 u64 load_avg;
274 u64 load_period;
275 u64 load_stamp, load_last, load_unacc_exec_time;
276
277 unsigned long load_contribution;
278#endif /* CONFIG_SMP */
279#ifdef CONFIG_CFS_BANDWIDTH 267#ifdef CONFIG_CFS_BANDWIDTH
280 int runtime_enabled; 268 int runtime_enabled;
281 u64 runtime_expires; 269 u64 runtime_expires;