diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 217 |
1 files changed, 121 insertions, 96 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 927c9307cd00..669c49aa57f0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
204 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | 204 | rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; |
205 | } | 205 | } |
206 | 206 | ||
207 | static inline int rt_bandwidth_enabled(void) | ||
208 | { | ||
209 | return sysctl_sched_rt_runtime >= 0; | ||
210 | } | ||
211 | |||
207 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | 212 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) |
208 | { | 213 | { |
209 | ktime_t now; | 214 | ktime_t now; |
210 | 215 | ||
211 | if (rt_b->rt_runtime == RUNTIME_INF) | 216 | if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) |
212 | return; | 217 | return; |
213 | 218 | ||
214 | if (hrtimer_active(&rt_b->rt_period_timer)) | 219 | if (hrtimer_active(&rt_b->rt_period_timer)) |
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | |||
298 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 303 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
299 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | 304 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; |
300 | #endif /* CONFIG_RT_GROUP_SCHED */ | 305 | #endif /* CONFIG_RT_GROUP_SCHED */ |
301 | #else /* !CONFIG_FAIR_GROUP_SCHED */ | 306 | #else /* !CONFIG_USER_SCHED */ |
302 | #define root_task_group init_task_group | 307 | #define root_task_group init_task_group |
303 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 308 | #endif /* CONFIG_USER_SCHED */ |
304 | 309 | ||
305 | /* task_group_lock serializes add/remove of task groups and also changes to | 310 | /* task_group_lock serializes add/remove of task groups and also changes to |
306 | * a task group's cpu shares. | 311 | * a task group's cpu shares. |
@@ -1087,7 +1092,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
1087 | return NOTIFY_DONE; | 1092 | return NOTIFY_DONE; |
1088 | } | 1093 | } |
1089 | 1094 | ||
1090 | static void init_hrtick(void) | 1095 | static __init void init_hrtick(void) |
1091 | { | 1096 | { |
1092 | hotcpu_notifier(hotplug_hrtick, 0); | 1097 | hotcpu_notifier(hotplug_hrtick, 0); |
1093 | } | 1098 | } |
@@ -1380,38 +1385,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) | |||
1380 | update_load_sub(&rq->load, load); | 1385 | update_load_sub(&rq->load, load); |
1381 | } | 1386 | } |
1382 | 1387 | ||
1383 | #ifdef CONFIG_SMP | 1388 | #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) |
1384 | static unsigned long source_load(int cpu, int type); | 1389 | typedef int (*tg_visitor)(struct task_group *, void *); |
1385 | static unsigned long target_load(int cpu, int type); | ||
1386 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | ||
1387 | |||
1388 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
1389 | { | ||
1390 | struct rq *rq = cpu_rq(cpu); | ||
1391 | |||
1392 | if (rq->nr_running) | ||
1393 | rq->avg_load_per_task = rq->load.weight / rq->nr_running; | ||
1394 | |||
1395 | return rq->avg_load_per_task; | ||
1396 | } | ||
1397 | |||
1398 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1399 | |||
1400 | typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *); | ||
1401 | 1390 | ||
1402 | /* | 1391 | /* |
1403 | * Iterate the full tree, calling @down when first entering a node and @up when | 1392 | * Iterate the full tree, calling @down when first entering a node and @up when |
1404 | * leaving it for the final time. | 1393 | * leaving it for the final time. |
1405 | */ | 1394 | */ |
1406 | static void | 1395 | static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) |
1407 | walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd) | ||
1408 | { | 1396 | { |
1409 | struct task_group *parent, *child; | 1397 | struct task_group *parent, *child; |
1398 | int ret; | ||
1410 | 1399 | ||
1411 | rcu_read_lock(); | 1400 | rcu_read_lock(); |
1412 | parent = &root_task_group; | 1401 | parent = &root_task_group; |
1413 | down: | 1402 | down: |
1414 | (*down)(parent, cpu, sd); | 1403 | ret = (*down)(parent, data); |
1404 | if (ret) | ||
1405 | goto out_unlock; | ||
1415 | list_for_each_entry_rcu(child, &parent->children, siblings) { | 1406 | list_for_each_entry_rcu(child, &parent->children, siblings) { |
1416 | parent = child; | 1407 | parent = child; |
1417 | goto down; | 1408 | goto down; |
@@ -1419,15 +1410,43 @@ down: | |||
1419 | up: | 1410 | up: |
1420 | continue; | 1411 | continue; |
1421 | } | 1412 | } |
1422 | (*up)(parent, cpu, sd); | 1413 | ret = (*up)(parent, data); |
1414 | if (ret) | ||
1415 | goto out_unlock; | ||
1423 | 1416 | ||
1424 | child = parent; | 1417 | child = parent; |
1425 | parent = parent->parent; | 1418 | parent = parent->parent; |
1426 | if (parent) | 1419 | if (parent) |
1427 | goto up; | 1420 | goto up; |
1421 | out_unlock: | ||
1428 | rcu_read_unlock(); | 1422 | rcu_read_unlock(); |
1423 | |||
1424 | return ret; | ||
1429 | } | 1425 | } |
1430 | 1426 | ||
1427 | static int tg_nop(struct task_group *tg, void *data) | ||
1428 | { | ||
1429 | return 0; | ||
1430 | } | ||
1431 | #endif | ||
1432 | |||
1433 | #ifdef CONFIG_SMP | ||
1434 | static unsigned long source_load(int cpu, int type); | ||
1435 | static unsigned long target_load(int cpu, int type); | ||
1436 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | ||
1437 | |||
1438 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
1439 | { | ||
1440 | struct rq *rq = cpu_rq(cpu); | ||
1441 | |||
1442 | if (rq->nr_running) | ||
1443 | rq->avg_load_per_task = rq->load.weight / rq->nr_running; | ||
1444 | |||
1445 | return rq->avg_load_per_task; | ||
1446 | } | ||
1447 | |||
1448 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1449 | |||
1431 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | 1450 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); |
1432 | 1451 | ||
1433 | /* | 1452 | /* |
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1486 | * This needs to be done in a bottom-up fashion because the rq weight of a | 1505 | * This needs to be done in a bottom-up fashion because the rq weight of a |
1487 | * parent group depends on the shares of its child groups. | 1506 | * parent group depends on the shares of its child groups. |
1488 | */ | 1507 | */ |
1489 | static void | 1508 | static int tg_shares_up(struct task_group *tg, void *data) |
1490 | tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1491 | { | 1509 | { |
1492 | unsigned long rq_weight = 0; | 1510 | unsigned long rq_weight = 0; |
1493 | unsigned long shares = 0; | 1511 | unsigned long shares = 0; |
1512 | struct sched_domain *sd = data; | ||
1494 | int i; | 1513 | int i; |
1495 | 1514 | ||
1496 | for_each_cpu_mask(i, sd->span) { | 1515 | for_each_cpu_mask(i, sd->span) { |
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) | |||
1515 | __update_group_shares_cpu(tg, i, shares, rq_weight); | 1534 | __update_group_shares_cpu(tg, i, shares, rq_weight); |
1516 | spin_unlock_irqrestore(&rq->lock, flags); | 1535 | spin_unlock_irqrestore(&rq->lock, flags); |
1517 | } | 1536 | } |
1537 | |||
1538 | return 0; | ||
1518 | } | 1539 | } |
1519 | 1540 | ||
1520 | /* | 1541 | /* |
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) | |||
1522 | * This needs to be done in a top-down fashion because the load of a child | 1543 | * This needs to be done in a top-down fashion because the load of a child |
1523 | * group is a fraction of its parents load. | 1544 | * group is a fraction of its parents load. |
1524 | */ | 1545 | */ |
1525 | static void | 1546 | static int tg_load_down(struct task_group *tg, void *data) |
1526 | tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1527 | { | 1547 | { |
1528 | unsigned long load; | 1548 | unsigned long load; |
1549 | long cpu = (long)data; | ||
1529 | 1550 | ||
1530 | if (!tg->parent) { | 1551 | if (!tg->parent) { |
1531 | load = cpu_rq(cpu)->load.weight; | 1552 | load = cpu_rq(cpu)->load.weight; |
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) | |||
1536 | } | 1557 | } |
1537 | 1558 | ||
1538 | tg->cfs_rq[cpu]->h_load = load; | 1559 | tg->cfs_rq[cpu]->h_load = load; |
1539 | } | ||
1540 | 1560 | ||
1541 | static void | 1561 | return 0; |
1542 | tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1543 | { | ||
1544 | } | 1562 | } |
1545 | 1563 | ||
1546 | static void update_shares(struct sched_domain *sd) | 1564 | static void update_shares(struct sched_domain *sd) |
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd) | |||
1550 | 1568 | ||
1551 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | 1569 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { |
1552 | sd->last_update = now; | 1570 | sd->last_update = now; |
1553 | walk_tg_tree(tg_nop, tg_shares_up, 0, sd); | 1571 | walk_tg_tree(tg_nop, tg_shares_up, sd); |
1554 | } | 1572 | } |
1555 | } | 1573 | } |
1556 | 1574 | ||
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1561 | spin_lock(&rq->lock); | 1579 | spin_lock(&rq->lock); |
1562 | } | 1580 | } |
1563 | 1581 | ||
1564 | static void update_h_load(int cpu) | 1582 | static void update_h_load(long cpu) |
1565 | { | 1583 | { |
1566 | walk_tg_tree(tg_load_down, tg_nop, cpu, NULL); | 1584 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
1567 | } | 1585 | } |
1568 | 1586 | ||
1569 | #else | 1587 | #else |
@@ -5171,7 +5189,8 @@ recheck: | |||
5171 | * Do not allow realtime tasks into groups that have no runtime | 5189 | * Do not allow realtime tasks into groups that have no runtime |
5172 | * assigned. | 5190 | * assigned. |
5173 | */ | 5191 | */ |
5174 | if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) | 5192 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
5193 | task_group(p)->rt_bandwidth.rt_runtime == 0) | ||
5175 | return -EPERM; | 5194 | return -EPERM; |
5176 | #endif | 5195 | #endif |
5177 | 5196 | ||
@@ -8808,73 +8827,77 @@ static DEFINE_MUTEX(rt_constraints_mutex); | |||
8808 | static unsigned long to_ratio(u64 period, u64 runtime) | 8827 | static unsigned long to_ratio(u64 period, u64 runtime) |
8809 | { | 8828 | { |
8810 | if (runtime == RUNTIME_INF) | 8829 | if (runtime == RUNTIME_INF) |
8811 | return 1ULL << 16; | 8830 | return 1ULL << 20; |
8812 | 8831 | ||
8813 | return div64_u64(runtime << 16, period); | 8832 | return div64_u64(runtime << 20, period); |
8814 | } | 8833 | } |
8815 | 8834 | ||
8816 | #ifdef CONFIG_CGROUP_SCHED | 8835 | /* Must be called with tasklist_lock held */ |
8817 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8836 | static inline int tg_has_rt_tasks(struct task_group *tg) |
8818 | { | 8837 | { |
8819 | struct task_group *tgi, *parent = tg->parent; | 8838 | struct task_struct *g, *p; |
8820 | unsigned long total = 0; | ||
8821 | 8839 | ||
8822 | if (!parent) { | 8840 | do_each_thread(g, p) { |
8823 | if (global_rt_period() < period) | 8841 | if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) |
8824 | return 0; | 8842 | return 1; |
8843 | } while_each_thread(g, p); | ||
8825 | 8844 | ||
8826 | return to_ratio(period, runtime) < | 8845 | return 0; |
8827 | to_ratio(global_rt_period(), global_rt_runtime()); | 8846 | } |
8828 | } | ||
8829 | 8847 | ||
8830 | if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period) | 8848 | struct rt_schedulable_data { |
8831 | return 0; | 8849 | struct task_group *tg; |
8850 | u64 rt_period; | ||
8851 | u64 rt_runtime; | ||
8852 | }; | ||
8832 | 8853 | ||
8833 | rcu_read_lock(); | 8854 | static int tg_schedulable(struct task_group *tg, void *data) |
8834 | list_for_each_entry_rcu(tgi, &parent->children, siblings) { | 8855 | { |
8835 | if (tgi == tg) | 8856 | struct rt_schedulable_data *d = data; |
8836 | continue; | 8857 | struct task_group *child; |
8858 | unsigned long total, sum = 0; | ||
8859 | u64 period, runtime; | ||
8837 | 8860 | ||
8838 | total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), | 8861 | period = ktime_to_ns(tg->rt_bandwidth.rt_period); |
8839 | tgi->rt_bandwidth.rt_runtime); | 8862 | runtime = tg->rt_bandwidth.rt_runtime; |
8863 | |||
8864 | if (tg == d->tg) { | ||
8865 | period = d->rt_period; | ||
8866 | runtime = d->rt_runtime; | ||
8840 | } | 8867 | } |
8841 | rcu_read_unlock(); | ||
8842 | 8868 | ||
8843 | return total + to_ratio(period, runtime) <= | 8869 | if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg)) |
8844 | to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), | 8870 | return -EBUSY; |
8845 | parent->rt_bandwidth.rt_runtime); | ||
8846 | } | ||
8847 | #elif defined CONFIG_USER_SCHED | ||
8848 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | ||
8849 | { | ||
8850 | struct task_group *tgi; | ||
8851 | unsigned long total = 0; | ||
8852 | unsigned long global_ratio = | ||
8853 | to_ratio(global_rt_period(), global_rt_runtime()); | ||
8854 | 8871 | ||
8855 | rcu_read_lock(); | 8872 | total = to_ratio(period, runtime); |
8856 | list_for_each_entry_rcu(tgi, &task_groups, list) { | 8873 | |
8857 | if (tgi == tg) | 8874 | list_for_each_entry_rcu(child, &tg->children, siblings) { |
8858 | continue; | 8875 | period = ktime_to_ns(child->rt_bandwidth.rt_period); |
8876 | runtime = child->rt_bandwidth.rt_runtime; | ||
8859 | 8877 | ||
8860 | total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), | 8878 | if (child == d->tg) { |
8861 | tgi->rt_bandwidth.rt_runtime); | 8879 | period = d->rt_period; |
8880 | runtime = d->rt_runtime; | ||
8881 | } | ||
8882 | |||
8883 | sum += to_ratio(period, runtime); | ||
8862 | } | 8884 | } |
8863 | rcu_read_unlock(); | ||
8864 | 8885 | ||
8865 | return total + to_ratio(period, runtime) < global_ratio; | 8886 | if (sum > total) |
8887 | return -EINVAL; | ||
8888 | |||
8889 | return 0; | ||
8866 | } | 8890 | } |
8867 | #endif | ||
8868 | 8891 | ||
8869 | /* Must be called with tasklist_lock held */ | 8892 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
8870 | static inline int tg_has_rt_tasks(struct task_group *tg) | ||
8871 | { | 8893 | { |
8872 | struct task_struct *g, *p; | 8894 | struct rt_schedulable_data data = { |
8873 | do_each_thread(g, p) { | 8895 | .tg = tg, |
8874 | if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) | 8896 | .rt_period = period, |
8875 | return 1; | 8897 | .rt_runtime = runtime, |
8876 | } while_each_thread(g, p); | 8898 | }; |
8877 | return 0; | 8899 | |
8900 | return walk_tg_tree(tg_schedulable, tg_nop, &data); | ||
8878 | } | 8901 | } |
8879 | 8902 | ||
8880 | static int tg_set_bandwidth(struct task_group *tg, | 8903 | static int tg_set_bandwidth(struct task_group *tg, |
@@ -8884,14 +8907,9 @@ static int tg_set_bandwidth(struct task_group *tg, | |||
8884 | 8907 | ||
8885 | mutex_lock(&rt_constraints_mutex); | 8908 | mutex_lock(&rt_constraints_mutex); |
8886 | read_lock(&tasklist_lock); | 8909 | read_lock(&tasklist_lock); |
8887 | if (rt_runtime == 0 && tg_has_rt_tasks(tg)) { | 8910 | err = __rt_schedulable(tg, rt_period, rt_runtime); |
8888 | err = -EBUSY; | 8911 | if (err) |
8889 | goto unlock; | 8912 | goto unlock; |
8890 | } | ||
8891 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { | ||
8892 | err = -EINVAL; | ||
8893 | goto unlock; | ||
8894 | } | ||
8895 | 8913 | ||
8896 | spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 8914 | spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
8897 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); | 8915 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); |
@@ -8964,12 +8982,16 @@ static int sched_rt_global_constraints(void) | |||
8964 | u64 rt_runtime, rt_period; | 8982 | u64 rt_runtime, rt_period; |
8965 | int ret = 0; | 8983 | int ret = 0; |
8966 | 8984 | ||
8985 | if (sysctl_sched_rt_period <= 0) | ||
8986 | return -EINVAL; | ||
8987 | |||
8967 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); | 8988 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); |
8968 | rt_runtime = tg->rt_bandwidth.rt_runtime; | 8989 | rt_runtime = tg->rt_bandwidth.rt_runtime; |
8969 | 8990 | ||
8970 | mutex_lock(&rt_constraints_mutex); | 8991 | mutex_lock(&rt_constraints_mutex); |
8971 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) | 8992 | read_lock(&tasklist_lock); |
8972 | ret = -EINVAL; | 8993 | ret = __rt_schedulable(tg, rt_period, rt_runtime); |
8994 | read_unlock(&tasklist_lock); | ||
8973 | mutex_unlock(&rt_constraints_mutex); | 8995 | mutex_unlock(&rt_constraints_mutex); |
8974 | 8996 | ||
8975 | return ret; | 8997 | return ret; |
@@ -8980,6 +9002,9 @@ static int sched_rt_global_constraints(void) | |||
8980 | unsigned long flags; | 9002 | unsigned long flags; |
8981 | int i; | 9003 | int i; |
8982 | 9004 | ||
9005 | if (sysctl_sched_rt_period <= 0) | ||
9006 | return -EINVAL; | ||
9007 | |||
8983 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 9008 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
8984 | for_each_possible_cpu(i) { | 9009 | for_each_possible_cpu(i) { |
8985 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 9010 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |