aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_rt.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r--kernel/sched_rt.c150
1 files changed, 111 insertions, 39 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c2266c43e993..08e937496b24 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,25 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
183 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 183 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
184} 184}
185 185
186typedef struct task_group *rt_rq_iter_t;
187
188#define for_each_rt_rq(rt_rq, iter, rq) \
189 for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
190 (&iter->list != &task_groups) && \
191 (rt_rq = iter->rt_rq[cpu_of(rq)]); \
192 iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
193
194static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
195{
196 list_add_rcu(&rt_rq->leaf_rt_rq_list,
197 &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
198}
199
200static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
201{
202 list_del_rcu(&rt_rq->leaf_rt_rq_list);
203}
204
186#define for_each_leaf_rt_rq(rt_rq, rq) \ 205#define for_each_leaf_rt_rq(rt_rq, rq) \
187 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) 206 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
188 207
@@ -199,11 +218,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
199 218
200static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 219static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
201{ 220{
202 int this_cpu = smp_processor_id();
203 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 221 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
204 struct sched_rt_entity *rt_se; 222 struct sched_rt_entity *rt_se;
205 223
206 rt_se = rt_rq->tg->rt_se[this_cpu]; 224 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
225
226 rt_se = rt_rq->tg->rt_se[cpu];
207 227
208 if (rt_rq->rt_nr_running) { 228 if (rt_rq->rt_nr_running) {
209 if (rt_se && !on_rt_rq(rt_se)) 229 if (rt_se && !on_rt_rq(rt_se))
@@ -215,10 +235,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
215 235
216static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 236static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
217{ 237{
218 int this_cpu = smp_processor_id();
219 struct sched_rt_entity *rt_se; 238 struct sched_rt_entity *rt_se;
239 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
220 240
221 rt_se = rt_rq->tg->rt_se[this_cpu]; 241 rt_se = rt_rq->tg->rt_se[cpu];
222 242
223 if (rt_se && on_rt_rq(rt_se)) 243 if (rt_se && on_rt_rq(rt_se))
224 dequeue_rt_entity(rt_se); 244 dequeue_rt_entity(rt_se);
@@ -276,6 +296,19 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
276 return ktime_to_ns(def_rt_bandwidth.rt_period); 296 return ktime_to_ns(def_rt_bandwidth.rt_period);
277} 297}
278 298
299typedef struct rt_rq *rt_rq_iter_t;
300
301#define for_each_rt_rq(rt_rq, iter, rq) \
302 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
303
304static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
305{
306}
307
308static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
309{
310}
311
279#define for_each_leaf_rt_rq(rt_rq, rq) \ 312#define for_each_leaf_rt_rq(rt_rq, rq) \
280 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 313 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
281 314
@@ -382,12 +415,13 @@ next:
382static void __disable_runtime(struct rq *rq) 415static void __disable_runtime(struct rq *rq)
383{ 416{
384 struct root_domain *rd = rq->rd; 417 struct root_domain *rd = rq->rd;
418 rt_rq_iter_t iter;
385 struct rt_rq *rt_rq; 419 struct rt_rq *rt_rq;
386 420
387 if (unlikely(!scheduler_running)) 421 if (unlikely(!scheduler_running))
388 return; 422 return;
389 423
390 for_each_leaf_rt_rq(rt_rq, rq) { 424 for_each_rt_rq(rt_rq, iter, rq) {
391 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 425 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
392 s64 want; 426 s64 want;
393 int i; 427 int i;
@@ -467,6 +501,7 @@ static void disable_runtime(struct rq *rq)
467 501
468static void __enable_runtime(struct rq *rq) 502static void __enable_runtime(struct rq *rq)
469{ 503{
504 rt_rq_iter_t iter;
470 struct rt_rq *rt_rq; 505 struct rt_rq *rt_rq;
471 506
472 if (unlikely(!scheduler_running)) 507 if (unlikely(!scheduler_running))
@@ -475,7 +510,7 @@ static void __enable_runtime(struct rq *rq)
475 /* 510 /*
476 * Reset each runqueue's bandwidth settings 511 * Reset each runqueue's bandwidth settings
477 */ 512 */
478 for_each_leaf_rt_rq(rt_rq, rq) { 513 for_each_rt_rq(rt_rq, iter, rq) {
479 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 514 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
480 515
481 raw_spin_lock(&rt_b->rt_runtime_lock); 516 raw_spin_lock(&rt_b->rt_runtime_lock);
@@ -542,12 +577,22 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
542 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 577 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
543 rt_rq->rt_throttled = 0; 578 rt_rq->rt_throttled = 0;
544 enqueue = 1; 579 enqueue = 1;
580
581 /*
582 * Force a clock update if the CPU was idle,
583 * lest wakeup -> unthrottle time accumulate.
584 */
585 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
586 rq->skip_clock_update = -1;
545 } 587 }
546 if (rt_rq->rt_time || rt_rq->rt_nr_running) 588 if (rt_rq->rt_time || rt_rq->rt_nr_running)
547 idle = 0; 589 idle = 0;
548 raw_spin_unlock(&rt_rq->rt_runtime_lock); 590 raw_spin_unlock(&rt_rq->rt_runtime_lock);
549 } else if (rt_rq->rt_nr_running) 591 } else if (rt_rq->rt_nr_running) {
550 idle = 0; 592 idle = 0;
593 if (!rt_rq_throttled(rt_rq))
594 enqueue = 1;
595 }
551 596
552 if (enqueue) 597 if (enqueue)
553 sched_rt_rq_enqueue(rt_rq); 598 sched_rt_rq_enqueue(rt_rq);
@@ -606,7 +651,7 @@ static void update_curr_rt(struct rq *rq)
606 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 651 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
607 u64 delta_exec; 652 u64 delta_exec;
608 653
609 if (!task_has_rt_policy(curr)) 654 if (curr->sched_class != &rt_sched_class)
610 return; 655 return;
611 656
612 delta_exec = rq->clock_task - curr->se.exec_start; 657 delta_exec = rq->clock_task - curr->se.exec_start;
@@ -825,6 +870,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
825 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 870 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
826 return; 871 return;
827 872
873 if (!rt_rq->rt_nr_running)
874 list_add_leaf_rt_rq(rt_rq);
875
828 if (head) 876 if (head)
829 list_add(&rt_se->run_list, queue); 877 list_add(&rt_se->run_list, queue);
830 else 878 else
@@ -844,6 +892,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
844 __clear_bit(rt_se_prio(rt_se), array->bitmap); 892 __clear_bit(rt_se_prio(rt_se), array->bitmap);
845 893
846 dec_rt_tasks(rt_se, rt_rq); 894 dec_rt_tasks(rt_se, rt_rq);
895 if (!rt_rq->rt_nr_running)
896 list_del_leaf_rt_rq(rt_rq);
847} 897}
848 898
849/* 899/*
@@ -949,13 +999,23 @@ static void yield_task_rt(struct rq *rq)
949static int find_lowest_rq(struct task_struct *task); 999static int find_lowest_rq(struct task_struct *task);
950 1000
951static int 1001static int
952select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) 1002select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
953{ 1003{
1004 struct task_struct *curr;
1005 struct rq *rq;
1006 int cpu;
1007
954 if (sd_flag != SD_BALANCE_WAKE) 1008 if (sd_flag != SD_BALANCE_WAKE)
955 return smp_processor_id(); 1009 return smp_processor_id();
956 1010
1011 cpu = task_cpu(p);
1012 rq = cpu_rq(cpu);
1013
1014 rcu_read_lock();
1015 curr = ACCESS_ONCE(rq->curr); /* unlocked access */
1016
957 /* 1017 /*
958 * If the current task is an RT task, then 1018 * If the current task on @p's runqueue is an RT task, then
959 * try to see if we can wake this RT task up on another 1019 * try to see if we can wake this RT task up on another
960 * runqueue. Otherwise simply start this RT task 1020 * runqueue. Otherwise simply start this RT task
961 * on its current runqueue. 1021 * on its current runqueue.
@@ -969,21 +1029,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
969 * lock? 1029 * lock?
970 * 1030 *
971 * For equal prio tasks, we just let the scheduler sort it out. 1031 * For equal prio tasks, we just let the scheduler sort it out.
1032 *
1033 * Otherwise, just let it ride on the affined RQ and the
1034 * post-schedule router will push the preempted task away
1035 *
1036 * This test is optimistic, if we get it wrong the load-balancer
1037 * will have to sort it out.
972 */ 1038 */
973 if (unlikely(rt_task(rq->curr)) && 1039 if (curr && unlikely(rt_task(curr)) &&
974 (rq->curr->rt.nr_cpus_allowed < 2 || 1040 (curr->rt.nr_cpus_allowed < 2 ||
975 rq->curr->prio < p->prio) && 1041 curr->prio < p->prio) &&
976 (p->rt.nr_cpus_allowed > 1)) { 1042 (p->rt.nr_cpus_allowed > 1)) {
977 int cpu = find_lowest_rq(p); 1043 int target = find_lowest_rq(p);
978 1044
979 return (cpu == -1) ? task_cpu(p) : cpu; 1045 if (target != -1)
1046 cpu = target;
980 } 1047 }
1048 rcu_read_unlock();
981 1049
982 /* 1050 return cpu;
983 * Otherwise, just let it ride on the affined RQ and the
984 * post-schedule router will push the preempted task away
985 */
986 return task_cpu(p);
987} 1051}
988 1052
989static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1053static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
@@ -1108,7 +1172,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1108 * The previous task needs to be made eligible for pushing 1172 * The previous task needs to be made eligible for pushing
1109 * if it is still active 1173 * if it is still active
1110 */ 1174 */
1111 if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) 1175 if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
1112 enqueue_pushable_task(rq, p); 1176 enqueue_pushable_task(rq, p);
1113} 1177}
1114 1178
@@ -1199,6 +1263,7 @@ static int find_lowest_rq(struct task_struct *task)
1199 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1263 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1200 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1264 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1201 1265
1266 rcu_read_lock();
1202 for_each_domain(cpu, sd) { 1267 for_each_domain(cpu, sd) {
1203 if (sd->flags & SD_WAKE_AFFINE) { 1268 if (sd->flags & SD_WAKE_AFFINE) {
1204 int best_cpu; 1269 int best_cpu;
@@ -1208,15 +1273,20 @@ static int find_lowest_rq(struct task_struct *task)
1208 * remote processor. 1273 * remote processor.
1209 */ 1274 */
1210 if (this_cpu != -1 && 1275 if (this_cpu != -1 &&
1211 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) 1276 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1277 rcu_read_unlock();
1212 return this_cpu; 1278 return this_cpu;
1279 }
1213 1280
1214 best_cpu = cpumask_first_and(lowest_mask, 1281 best_cpu = cpumask_first_and(lowest_mask,
1215 sched_domain_span(sd)); 1282 sched_domain_span(sd));
1216 if (best_cpu < nr_cpu_ids) 1283 if (best_cpu < nr_cpu_ids) {
1284 rcu_read_unlock();
1217 return best_cpu; 1285 return best_cpu;
1286 }
1218 } 1287 }
1219 } 1288 }
1289 rcu_read_unlock();
1220 1290
1221 /* 1291 /*
1222 * And finally, if there were no matches within the domains 1292 * And finally, if there were no matches within the domains
@@ -1259,7 +1329,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1259 !cpumask_test_cpu(lowest_rq->cpu, 1329 !cpumask_test_cpu(lowest_rq->cpu,
1260 &task->cpus_allowed) || 1330 &task->cpus_allowed) ||
1261 task_running(rq, task) || 1331 task_running(rq, task) ||
1262 !task->se.on_rq)) { 1332 !task->on_rq)) {
1263 1333
1264 raw_spin_unlock(&lowest_rq->lock); 1334 raw_spin_unlock(&lowest_rq->lock);
1265 lowest_rq = NULL; 1335 lowest_rq = NULL;
@@ -1293,7 +1363,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
1293 BUG_ON(task_current(rq, p)); 1363 BUG_ON(task_current(rq, p));
1294 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1364 BUG_ON(p->rt.nr_cpus_allowed <= 1);
1295 1365
1296 BUG_ON(!p->se.on_rq); 1366 BUG_ON(!p->on_rq);
1297 BUG_ON(!rt_task(p)); 1367 BUG_ON(!rt_task(p));
1298 1368
1299 return p; 1369 return p;
@@ -1350,7 +1420,7 @@ retry:
1350 task = pick_next_pushable_task(rq); 1420 task = pick_next_pushable_task(rq);
1351 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1421 if (task_cpu(next_task) == rq->cpu && task == next_task) {
1352 /* 1422 /*
1353 * If we get here, the task hasnt moved at all, but 1423 * If we get here, the task hasn't moved at all, but
1354 * it has failed to push. We will not try again, 1424 * it has failed to push. We will not try again,
1355 * since the other cpus will pull from us when they 1425 * since the other cpus will pull from us when they
1356 * are ready. 1426 * are ready.
@@ -1439,7 +1509,7 @@ static int pull_rt_task(struct rq *this_rq)
1439 */ 1509 */
1440 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 1510 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
1441 WARN_ON(p == src_rq->curr); 1511 WARN_ON(p == src_rq->curr);
1442 WARN_ON(!p->se.on_rq); 1512 WARN_ON(!p->on_rq);
1443 1513
1444 /* 1514 /*
1445 * There's a chance that p is higher in priority 1515 * There's a chance that p is higher in priority
@@ -1460,7 +1530,7 @@ static int pull_rt_task(struct rq *this_rq)
1460 /* 1530 /*
1461 * We continue with the search, just in 1531 * We continue with the search, just in
1462 * case there's an even higher prio task 1532 * case there's an even higher prio task
1463 * in another runqueue. (low likelyhood 1533 * in another runqueue. (low likelihood
1464 * but possible) 1534 * but possible)
1465 */ 1535 */
1466 } 1536 }
@@ -1510,7 +1580,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1510 * Update the migration status of the RQ if we have an RT task 1580 * Update the migration status of the RQ if we have an RT task
1511 * which is running AND changing its weight value. 1581 * which is running AND changing its weight value.
1512 */ 1582 */
1513 if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { 1583 if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
1514 struct rq *rq = task_rq(p); 1584 struct rq *rq = task_rq(p);
1515 1585
1516 if (!task_current(rq, p)) { 1586 if (!task_current(rq, p)) {
@@ -1571,8 +1641,7 @@ static void rq_offline_rt(struct rq *rq)
1571 * When switch from the rt queue, we bring ourselves to a position 1641 * When switch from the rt queue, we bring ourselves to a position
1572 * that we might want to pull RT tasks from other runqueues. 1642 * that we might want to pull RT tasks from other runqueues.
1573 */ 1643 */
1574static void switched_from_rt(struct rq *rq, struct task_struct *p, 1644static void switched_from_rt(struct rq *rq, struct task_struct *p)
1575 int running)
1576{ 1645{
1577 /* 1646 /*
1578 * If there are other RT tasks then we will reschedule 1647 * If there are other RT tasks then we will reschedule
@@ -1581,7 +1650,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
1581 * we may need to handle the pulling of RT tasks 1650 * we may need to handle the pulling of RT tasks
1582 * now. 1651 * now.
1583 */ 1652 */
1584 if (!rq->rt.rt_nr_running) 1653 if (p->on_rq && !rq->rt.rt_nr_running)
1585 pull_rt_task(rq); 1654 pull_rt_task(rq);
1586} 1655}
1587 1656
@@ -1600,8 +1669,7 @@ static inline void init_sched_rt_class(void)
1600 * with RT tasks. In this case we try to push them off to 1669 * with RT tasks. In this case we try to push them off to
1601 * other runqueues. 1670 * other runqueues.
1602 */ 1671 */
1603static void switched_to_rt(struct rq *rq, struct task_struct *p, 1672static void switched_to_rt(struct rq *rq, struct task_struct *p)
1604 int running)
1605{ 1673{
1606 int check_resched = 1; 1674 int check_resched = 1;
1607 1675
@@ -1612,7 +1680,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1612 * If that current running task is also an RT task 1680 * If that current running task is also an RT task
1613 * then see if we can move to another run queue. 1681 * then see if we can move to another run queue.
1614 */ 1682 */
1615 if (!running) { 1683 if (p->on_rq && rq->curr != p) {
1616#ifdef CONFIG_SMP 1684#ifdef CONFIG_SMP
1617 if (rq->rt.overloaded && push_rt_task(rq) && 1685 if (rq->rt.overloaded && push_rt_task(rq) &&
1618 /* Don't resched if we changed runqueues */ 1686 /* Don't resched if we changed runqueues */
@@ -1628,10 +1696,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1628 * Priority of the task has changed. This may cause 1696 * Priority of the task has changed. This may cause
1629 * us to initiate a push or pull. 1697 * us to initiate a push or pull.
1630 */ 1698 */
1631static void prio_changed_rt(struct rq *rq, struct task_struct *p, 1699static void
1632 int oldprio, int running) 1700prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
1633{ 1701{
1634 if (running) { 1702 if (!p->on_rq)
1703 return;
1704
1705 if (rq->curr == p) {
1635#ifdef CONFIG_SMP 1706#ifdef CONFIG_SMP
1636 /* 1707 /*
1637 * If our priority decreases while running, we 1708 * If our priority decreases while running, we
@@ -1767,10 +1838,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
1767 1838
1768static void print_rt_stats(struct seq_file *m, int cpu) 1839static void print_rt_stats(struct seq_file *m, int cpu)
1769{ 1840{
1841 rt_rq_iter_t iter;
1770 struct rt_rq *rt_rq; 1842 struct rt_rq *rt_rq;
1771 1843
1772 rcu_read_lock(); 1844 rcu_read_lock();
1773 for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) 1845 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
1774 print_rt_rq(m, cpu, rt_rq); 1846 print_rt_rq(m, cpu, rt_rq);
1775 rcu_read_unlock(); 1847 rcu_read_unlock();
1776} 1848}