aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c193
1 files changed, 95 insertions, 98 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 238a76957e86..72bb9483d949 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -637,7 +637,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
637 637
638#define WMULT_SHIFT 32 638#define WMULT_SHIFT 32
639 639
640static inline unsigned long 640static unsigned long
641calc_delta_mine(unsigned long delta_exec, unsigned long weight, 641calc_delta_mine(unsigned long delta_exec, unsigned long weight,
642 struct load_weight *lw) 642 struct load_weight *lw)
643{ 643{
@@ -657,7 +657,7 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
657 tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; 657 tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT;
658 } 658 }
659 659
660 return (unsigned long)min(tmp, (u64)sysctl_sched_runtime_limit); 660 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
661} 661}
662 662
663static inline unsigned long 663static inline unsigned long
@@ -678,46 +678,6 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
678 lw->inv_weight = 0; 678 lw->inv_weight = 0;
679} 679}
680 680
681static void __update_curr_load(struct rq *rq, struct load_stat *ls)
682{
683 if (rq->curr != rq->idle && ls->load.weight) {
684 ls->delta_exec += ls->delta_stat;
685 ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
686 ls->delta_stat = 0;
687 }
688}
689
690/*
691 * Update delta_exec, delta_fair fields for rq.
692 *
693 * delta_fair clock advances at a rate inversely proportional to
694 * total load (rq->ls.load.weight) on the runqueue, while
695 * delta_exec advances at the same rate as wall-clock (provided
696 * cpu is not idle).
697 *
698 * delta_exec / delta_fair is a measure of the (smoothened) load on this
699 * runqueue over any given interval. This (smoothened) load is used
700 * during load balance.
701 *
702 * This function is called /before/ updating rq->ls.load
703 * and when switching tasks.
704 */
705static void update_curr_load(struct rq *rq, u64 now)
706{
707 struct load_stat *ls = &rq->ls;
708 u64 start;
709
710 start = ls->load_update_start;
711 ls->load_update_start = now;
712 ls->delta_stat += now - start;
713 /*
714 * Stagger updates to ls->delta_fair. Very frequent updates
715 * can be expensive.
716 */
717 if (ls->delta_stat >= sysctl_sched_stat_granularity)
718 __update_curr_load(rq, ls);
719}
720
721/* 681/*
722 * To aid in avoiding the subversion of "niceness" due to uneven distribution 682 * To aid in avoiding the subversion of "niceness" due to uneven distribution
723 * of tasks with abnormal "nice" values across CPUs the contribution that 683 * of tasks with abnormal "nice" values across CPUs the contribution that
@@ -727,19 +687,6 @@ static void update_curr_load(struct rq *rq, u64 now)
727 * slice expiry etc. 687 * slice expiry etc.
728 */ 688 */
729 689
730/*
731 * Assume: static_prio_timeslice(NICE_TO_PRIO(0)) == DEF_TIMESLICE
732 * If static_prio_timeslice() is ever changed to break this assumption then
733 * this code will need modification
734 */
735#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE
736#define load_weight(lp) \
737 (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)
738#define PRIO_TO_LOAD_WEIGHT(prio) \
739 load_weight(static_prio_timeslice(prio))
740#define RTPRIO_TO_LOAD_WEIGHT(rp) \
741 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + load_weight(rp))
742
743#define WEIGHT_IDLEPRIO 2 690#define WEIGHT_IDLEPRIO 2
744#define WMULT_IDLEPRIO (1 << 31) 691#define WMULT_IDLEPRIO (1 << 31)
745 692
@@ -781,32 +728,6 @@ static const u32 prio_to_wmult[40] = {
781/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, 728/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
782}; 729};
783 730
784static inline void
785inc_load(struct rq *rq, const struct task_struct *p, u64 now)
786{
787 update_curr_load(rq, now);
788 update_load_add(&rq->ls.load, p->se.load.weight);
789}
790
791static inline void
792dec_load(struct rq *rq, const struct task_struct *p, u64 now)
793{
794 update_curr_load(rq, now);
795 update_load_sub(&rq->ls.load, p->se.load.weight);
796}
797
798static inline void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
799{
800 rq->nr_running++;
801 inc_load(rq, p, now);
802}
803
804static inline void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
805{
806 rq->nr_running--;
807 dec_load(rq, p, now);
808}
809
810static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); 731static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
811 732
812/* 733/*
@@ -837,6 +758,72 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
837 758
838#define sched_class_highest (&rt_sched_class) 759#define sched_class_highest (&rt_sched_class)
839 760
761static void __update_curr_load(struct rq *rq, struct load_stat *ls)
762{
763 if (rq->curr != rq->idle && ls->load.weight) {
764 ls->delta_exec += ls->delta_stat;
765 ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
766 ls->delta_stat = 0;
767 }
768}
769
770/*
771 * Update delta_exec, delta_fair fields for rq.
772 *
773 * delta_fair clock advances at a rate inversely proportional to
774 * total load (rq->ls.load.weight) on the runqueue, while
775 * delta_exec advances at the same rate as wall-clock (provided
776 * cpu is not idle).
777 *
778 * delta_exec / delta_fair is a measure of the (smoothened) load on this
779 * runqueue over any given interval. This (smoothened) load is used
780 * during load balance.
781 *
782 * This function is called /before/ updating rq->ls.load
783 * and when switching tasks.
784 */
785static void update_curr_load(struct rq *rq, u64 now)
786{
787 struct load_stat *ls = &rq->ls;
788 u64 start;
789
790 start = ls->load_update_start;
791 ls->load_update_start = now;
792 ls->delta_stat += now - start;
793 /*
794 * Stagger updates to ls->delta_fair. Very frequent updates
795 * can be expensive.
796 */
797 if (ls->delta_stat >= sysctl_sched_stat_granularity)
798 __update_curr_load(rq, ls);
799}
800
801static inline void
802inc_load(struct rq *rq, const struct task_struct *p, u64 now)
803{
804 update_curr_load(rq, now);
805 update_load_add(&rq->ls.load, p->se.load.weight);
806}
807
808static inline void
809dec_load(struct rq *rq, const struct task_struct *p, u64 now)
810{
811 update_curr_load(rq, now);
812 update_load_sub(&rq->ls.load, p->se.load.weight);
813}
814
815static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
816{
817 rq->nr_running++;
818 inc_load(rq, p, now);
819}
820
821static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
822{
823 rq->nr_running--;
824 dec_load(rq, p, now);
825}
826
840static void set_load_weight(struct task_struct *p) 827static void set_load_weight(struct task_struct *p)
841{ 828{
842 task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; 829 task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
@@ -996,18 +983,21 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
996 u64 clock_offset, fair_clock_offset; 983 u64 clock_offset, fair_clock_offset;
997 984
998 clock_offset = old_rq->clock - new_rq->clock; 985 clock_offset = old_rq->clock - new_rq->clock;
999 fair_clock_offset = old_rq->cfs.fair_clock - 986 fair_clock_offset = old_rq->cfs.fair_clock - new_rq->cfs.fair_clock;
1000 new_rq->cfs.fair_clock; 987
1001 if (p->se.wait_start)
1002 p->se.wait_start -= clock_offset;
1003 if (p->se.wait_start_fair) 988 if (p->se.wait_start_fair)
1004 p->se.wait_start_fair -= fair_clock_offset; 989 p->se.wait_start_fair -= fair_clock_offset;
990 if (p->se.sleep_start_fair)
991 p->se.sleep_start_fair -= fair_clock_offset;
992
993#ifdef CONFIG_SCHEDSTATS
994 if (p->se.wait_start)
995 p->se.wait_start -= clock_offset;
1005 if (p->se.sleep_start) 996 if (p->se.sleep_start)
1006 p->se.sleep_start -= clock_offset; 997 p->se.sleep_start -= clock_offset;
1007 if (p->se.block_start) 998 if (p->se.block_start)
1008 p->se.block_start -= clock_offset; 999 p->se.block_start -= clock_offset;
1009 if (p->se.sleep_start_fair) 1000#endif
1010 p->se.sleep_start_fair -= fair_clock_offset;
1011 1001
1012 __set_task_cpu(p, new_cpu); 1002 __set_task_cpu(p, new_cpu);
1013} 1003}
@@ -1568,17 +1558,19 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1568static void __sched_fork(struct task_struct *p) 1558static void __sched_fork(struct task_struct *p)
1569{ 1559{
1570 p->se.wait_start_fair = 0; 1560 p->se.wait_start_fair = 0;
1571 p->se.wait_start = 0;
1572 p->se.exec_start = 0; 1561 p->se.exec_start = 0;
1573 p->se.sum_exec_runtime = 0; 1562 p->se.sum_exec_runtime = 0;
1574 p->se.delta_exec = 0; 1563 p->se.delta_exec = 0;
1575 p->se.delta_fair_run = 0; 1564 p->se.delta_fair_run = 0;
1576 p->se.delta_fair_sleep = 0; 1565 p->se.delta_fair_sleep = 0;
1577 p->se.wait_runtime = 0; 1566 p->se.wait_runtime = 0;
1567 p->se.sleep_start_fair = 0;
1568
1569#ifdef CONFIG_SCHEDSTATS
1570 p->se.wait_start = 0;
1578 p->se.sum_wait_runtime = 0; 1571 p->se.sum_wait_runtime = 0;
1579 p->se.sum_sleep_runtime = 0; 1572 p->se.sum_sleep_runtime = 0;
1580 p->se.sleep_start = 0; 1573 p->se.sleep_start = 0;
1581 p->se.sleep_start_fair = 0;
1582 p->se.block_start = 0; 1574 p->se.block_start = 0;
1583 p->se.sleep_max = 0; 1575 p->se.sleep_max = 0;
1584 p->se.block_max = 0; 1576 p->se.block_max = 0;
@@ -1586,6 +1578,7 @@ static void __sched_fork(struct task_struct *p)
1586 p->se.wait_max = 0; 1578 p->se.wait_max = 0;
1587 p->se.wait_runtime_overruns = 0; 1579 p->se.wait_runtime_overruns = 0;
1588 p->se.wait_runtime_underruns = 0; 1580 p->se.wait_runtime_underruns = 0;
1581#endif
1589 1582
1590 INIT_LIST_HEAD(&p->run_list); 1583 INIT_LIST_HEAD(&p->run_list);
1591 p->se.on_rq = 0; 1584 p->se.on_rq = 0;
@@ -1654,22 +1647,27 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1654 unsigned long flags; 1647 unsigned long flags;
1655 struct rq *rq; 1648 struct rq *rq;
1656 int this_cpu; 1649 int this_cpu;
1650 u64 now;
1657 1651
1658 rq = task_rq_lock(p, &flags); 1652 rq = task_rq_lock(p, &flags);
1659 BUG_ON(p->state != TASK_RUNNING); 1653 BUG_ON(p->state != TASK_RUNNING);
1660 this_cpu = smp_processor_id(); /* parent's CPU */ 1654 this_cpu = smp_processor_id(); /* parent's CPU */
1655 now = rq_clock(rq);
1661 1656
1662 p->prio = effective_prio(p); 1657 p->prio = effective_prio(p);
1663 1658
1664 if (!sysctl_sched_child_runs_first || (clone_flags & CLONE_VM) || 1659 if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
1665 task_cpu(p) != this_cpu || !current->se.on_rq) { 1660 (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
1661 !current->se.on_rq) {
1662
1666 activate_task(rq, p, 0); 1663 activate_task(rq, p, 0);
1667 } else { 1664 } else {
1668 /* 1665 /*
1669 * Let the scheduling class do new task startup 1666 * Let the scheduling class do new task startup
1670 * management (if any): 1667 * management (if any):
1671 */ 1668 */
1672 p->sched_class->task_new(rq, p); 1669 p->sched_class->task_new(rq, p, now);
1670 inc_nr_running(p, rq, now);
1673 } 1671 }
1674 check_preempt_curr(rq, p); 1672 check_preempt_curr(rq, p);
1675 task_rq_unlock(rq, &flags); 1673 task_rq_unlock(rq, &flags);
@@ -2908,8 +2906,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2908 schedstat_inc(sd, alb_cnt); 2906 schedstat_inc(sd, alb_cnt);
2909 2907
2910 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2908 if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
2911 RTPRIO_TO_LOAD_WEIGHT(100), sd, CPU_IDLE, 2909 ULONG_MAX, sd, CPU_IDLE, NULL))
2912 NULL))
2913 schedstat_inc(sd, alb_pushed); 2910 schedstat_inc(sd, alb_pushed);
2914 else 2911 else
2915 schedstat_inc(sd, alb_failed); 2912 schedstat_inc(sd, alb_failed);
@@ -5269,8 +5266,6 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
5269 sizeof(int), 0644, proc_dointvec_minmax); 5266 sizeof(int), 0644, proc_dointvec_minmax);
5270 set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, 5267 set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct,
5271 sizeof(int), 0644, proc_dointvec_minmax); 5268 sizeof(int), 0644, proc_dointvec_minmax);
5272 set_table_entry(&table[9], 10, "cache_hot_time", &sd->cache_hot_time,
5273 sizeof(long long), 0644, proc_doulongvec_minmax);
5274 set_table_entry(&table[10], 11, "cache_nice_tries", 5269 set_table_entry(&table[10], 11, "cache_nice_tries",
5275 &sd->cache_nice_tries, 5270 &sd->cache_nice_tries,
5276 sizeof(int), 0644, proc_dointvec_minmax); 5271 sizeof(int), 0644, proc_dointvec_minmax);
@@ -6590,12 +6585,14 @@ void normalize_rt_tasks(void)
6590 do_each_thread(g, p) { 6585 do_each_thread(g, p) {
6591 p->se.fair_key = 0; 6586 p->se.fair_key = 0;
6592 p->se.wait_runtime = 0; 6587 p->se.wait_runtime = 0;
6588 p->se.exec_start = 0;
6593 p->se.wait_start_fair = 0; 6589 p->se.wait_start_fair = 0;
6590 p->se.sleep_start_fair = 0;
6591#ifdef CONFIG_SCHEDSTATS
6594 p->se.wait_start = 0; 6592 p->se.wait_start = 0;
6595 p->se.exec_start = 0;
6596 p->se.sleep_start = 0; 6593 p->se.sleep_start = 0;
6597 p->se.sleep_start_fair = 0;
6598 p->se.block_start = 0; 6594 p->se.block_start = 0;
6595#endif
6599 task_rq(p)->cfs.fair_clock = 0; 6596 task_rq(p)->cfs.fair_clock = 0;
6600 task_rq(p)->clock = 0; 6597 task_rq(p)->clock = 0;
6601 6598