diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 748 | 
1 files changed, 579 insertions, 169 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index ccacdbdecf45..d87c6e5d4e8c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c  | |||
| @@ -196,10 +196,28 @@ static inline int rt_bandwidth_enabled(void) | |||
| 196 | return sysctl_sched_rt_runtime >= 0; | 196 | return sysctl_sched_rt_runtime >= 0; | 
| 197 | } | 197 | } | 
| 198 | 198 | ||
| 199 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | 199 | static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) | 
| 200 | { | 200 | { | 
| 201 | ktime_t now; | 201 | unsigned long delta; | 
| 202 | ktime_t soft, hard, now; | ||
| 203 | |||
| 204 | for (;;) { | ||
| 205 | if (hrtimer_active(period_timer)) | ||
| 206 | break; | ||
| 207 | |||
| 208 | now = hrtimer_cb_get_time(period_timer); | ||
| 209 | hrtimer_forward(period_timer, now, period); | ||
| 202 | 210 | ||
| 211 | soft = hrtimer_get_softexpires(period_timer); | ||
| 212 | hard = hrtimer_get_expires(period_timer); | ||
| 213 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
| 214 | __hrtimer_start_range_ns(period_timer, soft, delta, | ||
| 215 | HRTIMER_MODE_ABS_PINNED, 0); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
| 220 | { | ||
| 203 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | 221 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | 
| 204 | return; | 222 | return; | 
| 205 | 223 | ||
| @@ -207,22 +225,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
| 207 | return; | 225 | return; | 
| 208 | 226 | ||
| 209 | raw_spin_lock(&rt_b->rt_runtime_lock); | 227 | raw_spin_lock(&rt_b->rt_runtime_lock); | 
| 210 | for (;;) { | 228 | start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); | 
| 211 | unsigned long delta; | ||
| 212 | ktime_t soft, hard; | ||
| 213 | |||
| 214 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
| 215 | break; | ||
| 216 | |||
| 217 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | ||
| 218 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | ||
| 219 | |||
| 220 | soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); | ||
| 221 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); | ||
| 222 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
| 223 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | ||
| 224 | HRTIMER_MODE_ABS_PINNED, 0); | ||
| 225 | } | ||
| 226 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 229 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 
| 227 | } | 230 | } | 
| 228 | 231 | ||
| @@ -247,6 +250,24 @@ struct cfs_rq; | |||
| 247 | 250 | ||
| 248 | static LIST_HEAD(task_groups); | 251 | static LIST_HEAD(task_groups); | 
| 249 | 252 | ||
| 253 | struct cfs_bandwidth { | ||
| 254 | #ifdef CONFIG_CFS_BANDWIDTH | ||
| 255 | raw_spinlock_t lock; | ||
| 256 | ktime_t period; | ||
| 257 | u64 quota, runtime; | ||
| 258 | s64 hierarchal_quota; | ||
| 259 | u64 runtime_expires; | ||
| 260 | |||
| 261 | int idle, timer_active; | ||
| 262 | struct hrtimer period_timer, slack_timer; | ||
| 263 | struct list_head throttled_cfs_rq; | ||
| 264 | |||
| 265 | /* statistics */ | ||
| 266 | int nr_periods, nr_throttled; | ||
| 267 | u64 throttled_time; | ||
| 268 | #endif | ||
| 269 | }; | ||
| 270 | |||
| 250 | /* task group related information */ | 271 | /* task group related information */ | 
| 251 | struct task_group { | 272 | struct task_group { | 
| 252 | struct cgroup_subsys_state css; | 273 | struct cgroup_subsys_state css; | 
| @@ -278,6 +299,8 @@ struct task_group { | |||
| 278 | #ifdef CONFIG_SCHED_AUTOGROUP | 299 | #ifdef CONFIG_SCHED_AUTOGROUP | 
| 279 | struct autogroup *autogroup; | 300 | struct autogroup *autogroup; | 
| 280 | #endif | 301 | #endif | 
| 302 | |||
| 303 | struct cfs_bandwidth cfs_bandwidth; | ||
| 281 | }; | 304 | }; | 
| 282 | 305 | ||
| 283 | /* task_group_lock serializes the addition/removal of task groups */ | 306 | /* task_group_lock serializes the addition/removal of task groups */ | 
| @@ -311,7 +334,7 @@ struct task_group root_task_group; | |||
| 311 | /* CFS-related fields in a runqueue */ | 334 | /* CFS-related fields in a runqueue */ | 
| 312 | struct cfs_rq { | 335 | struct cfs_rq { | 
| 313 | struct load_weight load; | 336 | struct load_weight load; | 
| 314 | unsigned long nr_running; | 337 | unsigned long nr_running, h_nr_running; | 
| 315 | 338 | ||
| 316 | u64 exec_clock; | 339 | u64 exec_clock; | 
| 317 | u64 min_vruntime; | 340 | u64 min_vruntime; | 
| @@ -377,9 +400,120 @@ struct cfs_rq { | |||
| 377 | 400 | ||
| 378 | unsigned long load_contribution; | 401 | unsigned long load_contribution; | 
| 379 | #endif | 402 | #endif | 
| 403 | #ifdef CONFIG_CFS_BANDWIDTH | ||
| 404 | int runtime_enabled; | ||
| 405 | u64 runtime_expires; | ||
| 406 | s64 runtime_remaining; | ||
| 407 | |||
| 408 | u64 throttled_timestamp; | ||
| 409 | int throttled, throttle_count; | ||
| 410 | struct list_head throttled_list; | ||
| 411 | #endif | ||
| 380 | #endif | 412 | #endif | 
| 381 | }; | 413 | }; | 
| 382 | 414 | ||
| 415 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 416 | #ifdef CONFIG_CFS_BANDWIDTH | ||
| 417 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
| 418 | { | ||
| 419 | return &tg->cfs_bandwidth; | ||
| 420 | } | ||
| 421 | |||
| 422 | static inline u64 default_cfs_period(void); | ||
| 423 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); | ||
| 424 | static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); | ||
| 425 | |||
| 426 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | ||
| 427 | { | ||
| 428 | struct cfs_bandwidth *cfs_b = | ||
| 429 | container_of(timer, struct cfs_bandwidth, slack_timer); | ||
| 430 | do_sched_cfs_slack_timer(cfs_b); | ||
| 431 | |||
| 432 | return HRTIMER_NORESTART; | ||
| 433 | } | ||
| 434 | |||
| 435 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | ||
| 436 | { | ||
| 437 | struct cfs_bandwidth *cfs_b = | ||
| 438 | container_of(timer, struct cfs_bandwidth, period_timer); | ||
| 439 | ktime_t now; | ||
| 440 | int overrun; | ||
| 441 | int idle = 0; | ||
| 442 | |||
| 443 | for (;;) { | ||
| 444 | now = hrtimer_cb_get_time(timer); | ||
| 445 | overrun = hrtimer_forward(timer, now, cfs_b->period); | ||
| 446 | |||
| 447 | if (!overrun) | ||
| 448 | break; | ||
| 449 | |||
| 450 | idle = do_sched_cfs_period_timer(cfs_b, overrun); | ||
| 451 | } | ||
| 452 | |||
| 453 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
| 454 | } | ||
| 455 | |||
| 456 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
| 457 | { | ||
| 458 | raw_spin_lock_init(&cfs_b->lock); | ||
| 459 | cfs_b->runtime = 0; | ||
| 460 | cfs_b->quota = RUNTIME_INF; | ||
| 461 | cfs_b->period = ns_to_ktime(default_cfs_period()); | ||
| 462 | |||
| 463 | INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); | ||
| 464 | hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
| 465 | cfs_b->period_timer.function = sched_cfs_period_timer; | ||
| 466 | hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
| 467 | cfs_b->slack_timer.function = sched_cfs_slack_timer; | ||
| 468 | } | ||
| 469 | |||
| 470 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
| 471 | { | ||
| 472 | cfs_rq->runtime_enabled = 0; | ||
| 473 | INIT_LIST_HEAD(&cfs_rq->throttled_list); | ||
| 474 | } | ||
| 475 | |||
| 476 | /* requires cfs_b->lock, may release to reprogram timer */ | ||
| 477 | static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
| 478 | { | ||
| 479 | /* | ||
| 480 | * The timer may be active because we're trying to set a new bandwidth | ||
| 481 | * period or because we're racing with the tear-down path | ||
| 482 | * (timer_active==0 becomes visible before the hrtimer call-back | ||
| 483 | * terminates). In either case we ensure that it's re-programmed | ||
| 484 | */ | ||
| 485 | while (unlikely(hrtimer_active(&cfs_b->period_timer))) { | ||
| 486 | raw_spin_unlock(&cfs_b->lock); | ||
| 487 | /* ensure cfs_b->lock is available while we wait */ | ||
| 488 | hrtimer_cancel(&cfs_b->period_timer); | ||
| 489 | |||
| 490 | raw_spin_lock(&cfs_b->lock); | ||
| 491 | /* if someone else restarted the timer then we're done */ | ||
| 492 | if (cfs_b->timer_active) | ||
| 493 | return; | ||
| 494 | } | ||
| 495 | |||
| 496 | cfs_b->timer_active = 1; | ||
| 497 | start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); | ||
| 498 | } | ||
| 499 | |||
| 500 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
| 501 | { | ||
| 502 | hrtimer_cancel(&cfs_b->period_timer); | ||
| 503 | hrtimer_cancel(&cfs_b->slack_timer); | ||
| 504 | } | ||
| 505 | #else | ||
| 506 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | ||
| 507 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
| 508 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
| 509 | |||
| 510 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
| 511 | { | ||
| 512 | return NULL; | ||
| 513 | } | ||
| 514 | #endif /* CONFIG_CFS_BANDWIDTH */ | ||
| 515 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
| 516 | |||
| 383 | /* Real-Time classes' related field in a runqueue: */ | 517 | /* Real-Time classes' related field in a runqueue: */ | 
| 384 | struct rt_rq { | 518 | struct rt_rq { | 
| 385 | struct rt_prio_array active; | 519 | struct rt_prio_array active; | 
| @@ -510,7 +644,7 @@ struct rq { | |||
| 510 | 644 | ||
| 511 | unsigned long cpu_power; | 645 | unsigned long cpu_power; | 
| 512 | 646 | ||
| 513 | unsigned char idle_at_tick; | 647 | unsigned char idle_balance; | 
| 514 | /* For active balancing */ | 648 | /* For active balancing */ | 
| 515 | int post_schedule; | 649 | int post_schedule; | 
| 516 | int active_balance; | 650 | int active_balance; | 
| @@ -520,8 +654,6 @@ struct rq { | |||
| 520 | int cpu; | 654 | int cpu; | 
| 521 | int online; | 655 | int online; | 
| 522 | 656 | ||
| 523 | unsigned long avg_load_per_task; | ||
| 524 | |||
| 525 | u64 rt_avg; | 657 | u64 rt_avg; | 
| 526 | u64 age_stamp; | 658 | u64 age_stamp; | 
| 527 | u64 idle_stamp; | 659 | u64 idle_stamp; | 
| @@ -570,7 +702,7 @@ struct rq { | |||
| 570 | #endif | 702 | #endif | 
| 571 | 703 | ||
| 572 | #ifdef CONFIG_SMP | 704 | #ifdef CONFIG_SMP | 
| 573 | struct task_struct *wake_list; | 705 | struct llist_head wake_list; | 
| 574 | #endif | 706 | #endif | 
| 575 | }; | 707 | }; | 
| 576 | 708 | ||
| @@ -1272,6 +1404,18 @@ void wake_up_idle_cpu(int cpu) | |||
| 1272 | smp_send_reschedule(cpu); | 1404 | smp_send_reschedule(cpu); | 
| 1273 | } | 1405 | } | 
| 1274 | 1406 | ||
| 1407 | static inline bool got_nohz_idle_kick(void) | ||
| 1408 | { | ||
| 1409 | return idle_cpu(smp_processor_id()) && this_rq()->nohz_balance_kick; | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | #else /* CONFIG_NO_HZ */ | ||
| 1413 | |||
| 1414 | static inline bool got_nohz_idle_kick(void) | ||
| 1415 | { | ||
| 1416 | return false; | ||
| 1417 | } | ||
| 1418 | |||
| 1275 | #endif /* CONFIG_NO_HZ */ | 1419 | #endif /* CONFIG_NO_HZ */ | 
| 1276 | 1420 | ||
| 1277 | static u64 sched_avg_period(void) | 1421 | static u64 sched_avg_period(void) | 
| @@ -1471,24 +1615,28 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) | |||
| 1471 | update_load_sub(&rq->load, load); | 1615 | update_load_sub(&rq->load, load); | 
| 1472 | } | 1616 | } | 
| 1473 | 1617 | ||
| 1474 | #if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) | 1618 | #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ | 
| 1619 | (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) | ||
| 1475 | typedef int (*tg_visitor)(struct task_group *, void *); | 1620 | typedef int (*tg_visitor)(struct task_group *, void *); | 
| 1476 | 1621 | ||
| 1477 | /* | 1622 | /* | 
| 1478 | * Iterate the full tree, calling @down when first entering a node and @up when | 1623 | * Iterate task_group tree rooted at *from, calling @down when first entering a | 
| 1479 | * leaving it for the final time. | 1624 | * node and @up when leaving it for the final time. | 
| 1625 | * | ||
| 1626 | * Caller must hold rcu_lock or sufficient equivalent. | ||
| 1480 | */ | 1627 | */ | 
| 1481 | static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) | 1628 | static int walk_tg_tree_from(struct task_group *from, | 
| 1629 | tg_visitor down, tg_visitor up, void *data) | ||
| 1482 | { | 1630 | { | 
| 1483 | struct task_group *parent, *child; | 1631 | struct task_group *parent, *child; | 
| 1484 | int ret; | 1632 | int ret; | 
| 1485 | 1633 | ||
| 1486 | rcu_read_lock(); | 1634 | parent = from; | 
| 1487 | parent = &root_task_group; | 1635 | |
| 1488 | down: | 1636 | down: | 
| 1489 | ret = (*down)(parent, data); | 1637 | ret = (*down)(parent, data); | 
| 1490 | if (ret) | 1638 | if (ret) | 
| 1491 | goto out_unlock; | 1639 | goto out; | 
| 1492 | list_for_each_entry_rcu(child, &parent->children, siblings) { | 1640 | list_for_each_entry_rcu(child, &parent->children, siblings) { | 
| 1493 | parent = child; | 1641 | parent = child; | 
| 1494 | goto down; | 1642 | goto down; | 
| @@ -1497,19 +1645,29 @@ up: | |||
| 1497 | continue; | 1645 | continue; | 
| 1498 | } | 1646 | } | 
| 1499 | ret = (*up)(parent, data); | 1647 | ret = (*up)(parent, data); | 
| 1500 | if (ret) | 1648 | if (ret || parent == from) | 
| 1501 | goto out_unlock; | 1649 | goto out; | 
| 1502 | 1650 | ||
| 1503 | child = parent; | 1651 | child = parent; | 
| 1504 | parent = parent->parent; | 1652 | parent = parent->parent; | 
| 1505 | if (parent) | 1653 | if (parent) | 
| 1506 | goto up; | 1654 | goto up; | 
| 1507 | out_unlock: | 1655 | out: | 
| 1508 | rcu_read_unlock(); | ||
| 1509 | |||
| 1510 | return ret; | 1656 | return ret; | 
| 1511 | } | 1657 | } | 
| 1512 | 1658 | ||
| 1659 | /* | ||
| 1660 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
| 1661 | * leaving it for the final time. | ||
| 1662 | * | ||
| 1663 | * Caller must hold rcu_lock or sufficient equivalent. | ||
| 1664 | */ | ||
| 1665 | |||
| 1666 | static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) | ||
| 1667 | { | ||
| 1668 | return walk_tg_tree_from(&root_task_group, down, up, data); | ||
| 1669 | } | ||
| 1670 | |||
| 1513 | static int tg_nop(struct task_group *tg, void *data) | 1671 | static int tg_nop(struct task_group *tg, void *data) | 
| 1514 | { | 1672 | { | 
| 1515 | return 0; | 1673 | return 0; | 
| @@ -1569,11 +1727,9 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
| 1569 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); | 1727 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); | 
| 1570 | 1728 | ||
| 1571 | if (nr_running) | 1729 | if (nr_running) | 
| 1572 | rq->avg_load_per_task = rq->load.weight / nr_running; | 1730 | return rq->load.weight / nr_running; | 
| 1573 | else | ||
| 1574 | rq->avg_load_per_task = 0; | ||
| 1575 | 1731 | ||
| 1576 | return rq->avg_load_per_task; | 1732 | return 0; | 
| 1577 | } | 1733 | } | 
| 1578 | 1734 | ||
| 1579 | #ifdef CONFIG_PREEMPT | 1735 | #ifdef CONFIG_PREEMPT | 
| @@ -1739,7 +1895,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
| 1739 | #ifdef CONFIG_SMP | 1895 | #ifdef CONFIG_SMP | 
| 1740 | /* | 1896 | /* | 
| 1741 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | 1897 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | 
| 1742 | * successfuly executed on another CPU. We must ensure that updates of | 1898 | * successfully executed on another CPU. We must ensure that updates of | 
| 1743 | * per-task data have been completed by this moment. | 1899 | * per-task data have been completed by this moment. | 
| 1744 | */ | 1900 | */ | 
| 1745 | smp_wmb(); | 1901 | smp_wmb(); | 
| @@ -1806,7 +1962,6 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags) | |||
| 1806 | rq->nr_uninterruptible--; | 1962 | rq->nr_uninterruptible--; | 
| 1807 | 1963 | ||
| 1808 | enqueue_task(rq, p, flags); | 1964 | enqueue_task(rq, p, flags); | 
| 1809 | inc_nr_running(rq); | ||
| 1810 | } | 1965 | } | 
| 1811 | 1966 | ||
| 1812 | /* | 1967 | /* | 
| @@ -1818,7 +1973,6 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
| 1818 | rq->nr_uninterruptible++; | 1973 | rq->nr_uninterruptible++; | 
| 1819 | 1974 | ||
| 1820 | dequeue_task(rq, p, flags); | 1975 | dequeue_task(rq, p, flags); | 
| 1821 | dec_nr_running(rq); | ||
| 1822 | } | 1976 | } | 
| 1823 | 1977 | ||
| 1824 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 1978 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 
| @@ -2390,11 +2544,11 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
| 2390 | 2544 | ||
| 2391 | /* Look for allowed, online CPU in same node. */ | 2545 | /* Look for allowed, online CPU in same node. */ | 
| 2392 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | 2546 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | 
| 2393 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 2547 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 
| 2394 | return dest_cpu; | 2548 | return dest_cpu; | 
| 2395 | 2549 | ||
| 2396 | /* Any allowed, online CPU? */ | 2550 | /* Any allowed, online CPU? */ | 
| 2397 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | 2551 | dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); | 
| 2398 | if (dest_cpu < nr_cpu_ids) | 2552 | if (dest_cpu < nr_cpu_ids) | 
| 2399 | return dest_cpu; | 2553 | return dest_cpu; | 
| 2400 | 2554 | ||
| @@ -2431,7 +2585,7 @@ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | |||
| 2431 | * [ this allows ->select_task() to simply return task_cpu(p) and | 2585 | * [ this allows ->select_task() to simply return task_cpu(p) and | 
| 2432 | * not worry about this generic constraint ] | 2586 | * not worry about this generic constraint ] | 
| 2433 | */ | 2587 | */ | 
| 2434 | if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || | 2588 | if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) || | 
| 2435 | !cpu_online(cpu))) | 2589 | !cpu_online(cpu))) | 
| 2436 | cpu = select_fallback_rq(task_cpu(p), p); | 2590 | cpu = select_fallback_rq(task_cpu(p), p); | 
| 2437 | 2591 | ||
| @@ -2556,42 +2710,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) | |||
| 2556 | } | 2710 | } | 
| 2557 | 2711 | ||
| 2558 | #ifdef CONFIG_SMP | 2712 | #ifdef CONFIG_SMP | 
| 2559 | static void sched_ttwu_do_pending(struct task_struct *list) | 2713 | static void sched_ttwu_pending(void) | 
| 2560 | { | 2714 | { | 
| 2561 | struct rq *rq = this_rq(); | 2715 | struct rq *rq = this_rq(); | 
| 2716 | struct llist_node *llist = llist_del_all(&rq->wake_list); | ||
| 2717 | struct task_struct *p; | ||
| 2562 | 2718 | ||
| 2563 | raw_spin_lock(&rq->lock); | 2719 | raw_spin_lock(&rq->lock); | 
| 2564 | 2720 | ||
| 2565 | while (list) { | 2721 | while (llist) { | 
| 2566 | struct task_struct *p = list; | 2722 | p = llist_entry(llist, struct task_struct, wake_entry); | 
| 2567 | list = list->wake_entry; | 2723 | llist = llist_next(llist); | 
| 2568 | ttwu_do_activate(rq, p, 0); | 2724 | ttwu_do_activate(rq, p, 0); | 
| 2569 | } | 2725 | } | 
| 2570 | 2726 | ||
| 2571 | raw_spin_unlock(&rq->lock); | 2727 | raw_spin_unlock(&rq->lock); | 
| 2572 | } | 2728 | } | 
| 2573 | 2729 | ||
| 2574 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 2575 | |||
| 2576 | static void sched_ttwu_pending(void) | ||
| 2577 | { | ||
| 2578 | struct rq *rq = this_rq(); | ||
| 2579 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2580 | |||
| 2581 | if (!list) | ||
| 2582 | return; | ||
| 2583 | |||
| 2584 | sched_ttwu_do_pending(list); | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
| 2588 | |||
| 2589 | void scheduler_ipi(void) | 2730 | void scheduler_ipi(void) | 
| 2590 | { | 2731 | { | 
| 2591 | struct rq *rq = this_rq(); | 2732 | if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) | 
| 2592 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2593 | |||
| 2594 | if (!list) | ||
| 2595 | return; | 2733 | return; | 
| 2596 | 2734 | ||
| 2597 | /* | 2735 | /* | 
| @@ -2608,25 +2746,21 @@ void scheduler_ipi(void) | |||
| 2608 | * somewhat pessimize the simple resched case. | 2746 | * somewhat pessimize the simple resched case. | 
| 2609 | */ | 2747 | */ | 
| 2610 | irq_enter(); | 2748 | irq_enter(); | 
| 2611 | sched_ttwu_do_pending(list); | 2749 | sched_ttwu_pending(); | 
| 2750 | |||
| 2751 | /* | ||
| 2752 | * Check if someone kicked us for doing the nohz idle load balance. | ||
| 2753 | */ | ||
| 2754 | if (unlikely(got_nohz_idle_kick() && !need_resched())) { | ||
| 2755 | this_rq()->idle_balance = 1; | ||
| 2756 | raise_softirq_irqoff(SCHED_SOFTIRQ); | ||
| 2757 | } | ||
| 2612 | irq_exit(); | 2758 | irq_exit(); | 
| 2613 | } | 2759 | } | 
| 2614 | 2760 | ||
| 2615 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | 2761 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | 
| 2616 | { | 2762 | { | 
| 2617 | struct rq *rq = cpu_rq(cpu); | 2763 | if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) | 
| 2618 | struct task_struct *next = rq->wake_list; | ||
| 2619 | |||
| 2620 | for (;;) { | ||
| 2621 | struct task_struct *old = next; | ||
| 2622 | |||
| 2623 | p->wake_entry = next; | ||
| 2624 | next = cmpxchg(&rq->wake_list, old, p); | ||
| 2625 | if (next == old) | ||
| 2626 | break; | ||
| 2627 | } | ||
| 2628 | |||
| 2629 | if (!next) | ||
| 2630 | smp_send_reschedule(cpu); | 2764 | smp_send_reschedule(cpu); | 
| 2631 | } | 2765 | } | 
| 2632 | 2766 | ||
| @@ -2848,19 +2982,23 @@ void sched_fork(struct task_struct *p) | |||
| 2848 | p->state = TASK_RUNNING; | 2982 | p->state = TASK_RUNNING; | 
| 2849 | 2983 | ||
| 2850 | /* | 2984 | /* | 
| 2985 | * Make sure we do not leak PI boosting priority to the child. | ||
| 2986 | */ | ||
| 2987 | p->prio = current->normal_prio; | ||
| 2988 | |||
| 2989 | /* | ||
| 2851 | * Revert to default priority/policy on fork if requested. | 2990 | * Revert to default priority/policy on fork if requested. | 
| 2852 | */ | 2991 | */ | 
| 2853 | if (unlikely(p->sched_reset_on_fork)) { | 2992 | if (unlikely(p->sched_reset_on_fork)) { | 
| 2854 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { | 2993 | if (task_has_rt_policy(p)) { | 
| 2855 | p->policy = SCHED_NORMAL; | 2994 | p->policy = SCHED_NORMAL; | 
| 2856 | p->normal_prio = p->static_prio; | ||
| 2857 | } | ||
| 2858 | |||
| 2859 | if (PRIO_TO_NICE(p->static_prio) < 0) { | ||
| 2860 | p->static_prio = NICE_TO_PRIO(0); | 2995 | p->static_prio = NICE_TO_PRIO(0); | 
| 2861 | p->normal_prio = p->static_prio; | 2996 | p->rt_priority = 0; | 
| 2862 | set_load_weight(p); | 2997 | } else if (PRIO_TO_NICE(p->static_prio) < 0) | 
| 2863 | } | 2998 | p->static_prio = NICE_TO_PRIO(0); | 
| 2999 | |||
| 3000 | p->prio = p->normal_prio = __normal_prio(p); | ||
| 3001 | set_load_weight(p); | ||
| 2864 | 3002 | ||
| 2865 | /* | 3003 | /* | 
| 2866 | * We don't need the reset flag anymore after the fork. It has | 3004 | * We don't need the reset flag anymore after the fork. It has | 
| @@ -2869,11 +3007,6 @@ void sched_fork(struct task_struct *p) | |||
| 2869 | p->sched_reset_on_fork = 0; | 3007 | p->sched_reset_on_fork = 0; | 
| 2870 | } | 3008 | } | 
| 2871 | 3009 | ||
| 2872 | /* | ||
| 2873 | * Make sure we do not leak PI boosting priority to the child. | ||
| 2874 | */ | ||
| 2875 | p->prio = current->normal_prio; | ||
| 2876 | |||
| 2877 | if (!rt_prio(p->prio)) | 3010 | if (!rt_prio(p->prio)) | 
| 2878 | p->sched_class = &fair_sched_class; | 3011 | p->sched_class = &fair_sched_class; | 
| 2879 | 3012 | ||
| @@ -3065,7 +3198,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 3065 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 3198 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 
| 3066 | local_irq_disable(); | 3199 | local_irq_disable(); | 
| 3067 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 3200 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 
| 3068 | perf_event_task_sched_in(current); | 3201 | perf_event_task_sched_in(prev, current); | 
| 3069 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 3202 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 
| 3070 | local_irq_enable(); | 3203 | local_irq_enable(); | 
| 3071 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 3204 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 
| @@ -3725,30 +3858,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
| 3725 | } | 3858 | } | 
| 3726 | 3859 | ||
| 3727 | /* | 3860 | /* | 
| 3728 | * Return sum_exec_runtime for the thread group. | ||
| 3729 | * In case the task is currently running, return the sum plus current's | ||
| 3730 | * pending runtime that have not been accounted yet. | ||
| 3731 | * | ||
| 3732 | * Note that the thread group might have other running tasks as well, | ||
| 3733 | * so the return value not includes other pending runtime that other | ||
| 3734 | * running tasks might have. | ||
| 3735 | */ | ||
| 3736 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
| 3737 | { | ||
| 3738 | struct task_cputime totals; | ||
| 3739 | unsigned long flags; | ||
| 3740 | struct rq *rq; | ||
| 3741 | u64 ns; | ||
| 3742 | |||
| 3743 | rq = task_rq_lock(p, &flags); | ||
| 3744 | thread_group_cputime(p, &totals); | ||
| 3745 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
| 3746 | task_rq_unlock(rq, p, &flags); | ||
| 3747 | |||
| 3748 | return ns; | ||
| 3749 | } | ||
| 3750 | |||
| 3751 | /* | ||
| 3752 | * Account user cpu time to a process. | 3861 | * Account user cpu time to a process. | 
| 3753 | * @p: the process that the cpu time gets accounted to | 3862 | * @p: the process that the cpu time gets accounted to | 
| 3754 | * @cputime: the cpu time spent in user space since the last update | 3863 | * @cputime: the cpu time spent in user space since the last update | 
| @@ -4140,7 +4249,7 @@ void scheduler_tick(void) | |||
| 4140 | perf_event_task_tick(); | 4249 | perf_event_task_tick(); | 
| 4141 | 4250 | ||
| 4142 | #ifdef CONFIG_SMP | 4251 | #ifdef CONFIG_SMP | 
| 4143 | rq->idle_at_tick = idle_cpu(cpu); | 4252 | rq->idle_balance = idle_cpu(cpu); | 
| 4144 | trigger_load_balance(rq, cpu); | 4253 | trigger_load_balance(rq, cpu); | 
| 4145 | #endif | 4254 | #endif | 
| 4146 | } | 4255 | } | 
| @@ -4237,6 +4346,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 4237 | */ | 4346 | */ | 
| 4238 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) | 4347 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) | 
| 4239 | __schedule_bug(prev); | 4348 | __schedule_bug(prev); | 
| 4349 | rcu_sleep_check(); | ||
| 4240 | 4350 | ||
| 4241 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4351 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 
| 4242 | 4352 | ||
| @@ -4263,7 +4373,7 @@ pick_next_task(struct rq *rq) | |||
| 4263 | * Optimization: we know that if all tasks are in | 4373 | * Optimization: we know that if all tasks are in | 
| 4264 | * the fair class we can call that function directly: | 4374 | * the fair class we can call that function directly: | 
| 4265 | */ | 4375 | */ | 
| 4266 | if (likely(rq->nr_running == rq->cfs.nr_running)) { | 4376 | if (likely(rq->nr_running == rq->cfs.h_nr_running)) { | 
| 4267 | p = fair_sched_class.pick_next_task(rq); | 4377 | p = fair_sched_class.pick_next_task(rq); | 
| 4268 | if (likely(p)) | 4378 | if (likely(p)) | 
| 4269 | return p; | 4379 | return p; | 
| @@ -4279,9 +4389,9 @@ pick_next_task(struct rq *rq) | |||
| 4279 | } | 4389 | } | 
| 4280 | 4390 | ||
| 4281 | /* | 4391 | /* | 
| 4282 | * schedule() is the main scheduler function. | 4392 | * __schedule() is the main scheduler function. | 
| 4283 | */ | 4393 | */ | 
| 4284 | asmlinkage void __sched schedule(void) | 4394 | static void __sched __schedule(void) | 
| 4285 | { | 4395 | { | 
| 4286 | struct task_struct *prev, *next; | 4396 | struct task_struct *prev, *next; | 
| 4287 | unsigned long *switch_count; | 4397 | unsigned long *switch_count; | 
| @@ -4322,16 +4432,6 @@ need_resched: | |||
| 4322 | if (to_wakeup) | 4432 | if (to_wakeup) | 
| 4323 | try_to_wake_up_local(to_wakeup); | 4433 | try_to_wake_up_local(to_wakeup); | 
| 4324 | } | 4434 | } | 
| 4325 | |||
| 4326 | /* | ||
| 4327 | * If we are going to sleep and we have plugged IO | ||
| 4328 | * queued, make sure to submit it to avoid deadlocks. | ||
| 4329 | */ | ||
| 4330 | if (blk_needs_flush_plug(prev)) { | ||
| 4331 | raw_spin_unlock(&rq->lock); | ||
| 4332 | blk_schedule_flush_plug(prev); | ||
| 4333 | raw_spin_lock(&rq->lock); | ||
| 4334 | } | ||
| 4335 | } | 4435 | } | 
| 4336 | switch_count = &prev->nvcsw; | 4436 | switch_count = &prev->nvcsw; | 
| 4337 | } | 4437 | } | 
| @@ -4369,6 +4469,26 @@ need_resched: | |||
| 4369 | if (need_resched()) | 4469 | if (need_resched()) | 
| 4370 | goto need_resched; | 4470 | goto need_resched; | 
| 4371 | } | 4471 | } | 
| 4472 | |||
| 4473 | static inline void sched_submit_work(struct task_struct *tsk) | ||
| 4474 | { | ||
| 4475 | if (!tsk->state) | ||
| 4476 | return; | ||
| 4477 | /* | ||
| 4478 | * If we are going to sleep and we have plugged IO queued, | ||
| 4479 | * make sure to submit it to avoid deadlocks. | ||
| 4480 | */ | ||
| 4481 | if (blk_needs_flush_plug(tsk)) | ||
| 4482 | blk_schedule_flush_plug(tsk); | ||
| 4483 | } | ||
| 4484 | |||
| 4485 | asmlinkage void __sched schedule(void) | ||
| 4486 | { | ||
| 4487 | struct task_struct *tsk = current; | ||
| 4488 | |||
| 4489 | sched_submit_work(tsk); | ||
| 4490 | __schedule(); | ||
| 4491 | } | ||
| 4372 | EXPORT_SYMBOL(schedule); | 4492 | EXPORT_SYMBOL(schedule); | 
| 4373 | 4493 | ||
| 4374 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 4494 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 
| @@ -4435,7 +4555,7 @@ asmlinkage void __sched notrace preempt_schedule(void) | |||
| 4435 | 4555 | ||
| 4436 | do { | 4556 | do { | 
| 4437 | add_preempt_count_notrace(PREEMPT_ACTIVE); | 4557 | add_preempt_count_notrace(PREEMPT_ACTIVE); | 
| 4438 | schedule(); | 4558 | __schedule(); | 
| 4439 | sub_preempt_count_notrace(PREEMPT_ACTIVE); | 4559 | sub_preempt_count_notrace(PREEMPT_ACTIVE); | 
| 4440 | 4560 | ||
| 4441 | /* | 4561 | /* | 
| @@ -4463,7 +4583,7 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
| 4463 | do { | 4583 | do { | 
| 4464 | add_preempt_count(PREEMPT_ACTIVE); | 4584 | add_preempt_count(PREEMPT_ACTIVE); | 
| 4465 | local_irq_enable(); | 4585 | local_irq_enable(); | 
| 4466 | schedule(); | 4586 | __schedule(); | 
| 4467 | local_irq_disable(); | 4587 | local_irq_disable(); | 
| 4468 | sub_preempt_count(PREEMPT_ACTIVE); | 4588 | sub_preempt_count(PREEMPT_ACTIVE); | 
| 4469 | 4589 | ||
| @@ -5039,7 +5159,20 @@ EXPORT_SYMBOL(task_nice); | |||
| 5039 | */ | 5159 | */ | 
| 5040 | int idle_cpu(int cpu) | 5160 | int idle_cpu(int cpu) | 
| 5041 | { | 5161 | { | 
| 5042 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; | 5162 | struct rq *rq = cpu_rq(cpu); | 
| 5163 | |||
| 5164 | if (rq->curr != rq->idle) | ||
| 5165 | return 0; | ||
| 5166 | |||
| 5167 | if (rq->nr_running) | ||
| 5168 | return 0; | ||
| 5169 | |||
| 5170 | #ifdef CONFIG_SMP | ||
| 5171 | if (!llist_empty(&rq->wake_list)) | ||
| 5172 | return 0; | ||
| 5173 | #endif | ||
| 5174 | |||
| 5175 | return 1; | ||
| 5043 | } | 5176 | } | 
| 5044 | 5177 | ||
| 5045 | /** | 5178 | /** | 
| @@ -5588,7 +5721,7 @@ static inline int should_resched(void) | |||
| 5588 | static void __cond_resched(void) | 5721 | static void __cond_resched(void) | 
| 5589 | { | 5722 | { | 
| 5590 | add_preempt_count(PREEMPT_ACTIVE); | 5723 | add_preempt_count(PREEMPT_ACTIVE); | 
| 5591 | schedule(); | 5724 | __schedule(); | 
| 5592 | sub_preempt_count(PREEMPT_ACTIVE); | 5725 | sub_preempt_count(PREEMPT_ACTIVE); | 
| 5593 | } | 5726 | } | 
| 5594 | 5727 | ||
| @@ -5889,7 +6022,7 @@ void show_state_filter(unsigned long state_filter) | |||
| 5889 | printk(KERN_INFO | 6022 | printk(KERN_INFO | 
| 5890 | " task PC stack pid father\n"); | 6023 | " task PC stack pid father\n"); | 
| 5891 | #endif | 6024 | #endif | 
| 5892 | read_lock(&tasklist_lock); | 6025 | rcu_read_lock(); | 
| 5893 | do_each_thread(g, p) { | 6026 | do_each_thread(g, p) { | 
| 5894 | /* | 6027 | /* | 
| 5895 | * reset the NMI-timeout, listing all files on a slow | 6028 | * reset the NMI-timeout, listing all files on a slow | 
| @@ -5905,7 +6038,7 @@ void show_state_filter(unsigned long state_filter) | |||
| 5905 | #ifdef CONFIG_SCHED_DEBUG | 6038 | #ifdef CONFIG_SCHED_DEBUG | 
| 5906 | sysrq_sched_debug_show(); | 6039 | sysrq_sched_debug_show(); | 
| 5907 | #endif | 6040 | #endif | 
| 5908 | read_unlock(&tasklist_lock); | 6041 | rcu_read_unlock(); | 
| 5909 | /* | 6042 | /* | 
| 5910 | * Only show locks if all tasks are dumped: | 6043 | * Only show locks if all tasks are dumped: | 
| 5911 | */ | 6044 | */ | 
| @@ -5969,15 +6102,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
| 5969 | } | 6102 | } | 
| 5970 | 6103 | ||
| 5971 | /* | 6104 | /* | 
| 5972 | * In a system that switches off the HZ timer nohz_cpu_mask | ||
| 5973 | * indicates which cpus entered this state. This is used | ||
| 5974 | * in the rcu update to wait only for active cpus. For system | ||
| 5975 | * which do not switch off the HZ timer nohz_cpu_mask should | ||
| 5976 | * always be CPU_BITS_NONE. | ||
| 5977 | */ | ||
| 5978 | cpumask_var_t nohz_cpu_mask; | ||
| 5979 | |||
| 5980 | /* | ||
| 5981 | * Increase the granularity value when there are more CPUs, | 6105 | * Increase the granularity value when there are more CPUs, | 
| 5982 | * because with more CPUs the 'effective latency' as visible | 6106 | * because with more CPUs the 'effective latency' as visible | 
| 5983 | * to users decreases. But the relationship is not linear, | 6107 | * to users decreases. But the relationship is not linear, | 
| @@ -6029,10 +6153,9 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |||
| 6029 | { | 6153 | { | 
| 6030 | if (p->sched_class && p->sched_class->set_cpus_allowed) | 6154 | if (p->sched_class && p->sched_class->set_cpus_allowed) | 
| 6031 | p->sched_class->set_cpus_allowed(p, new_mask); | 6155 | p->sched_class->set_cpus_allowed(p, new_mask); | 
| 6032 | else { | 6156 | |
| 6033 | cpumask_copy(&p->cpus_allowed, new_mask); | 6157 | cpumask_copy(&p->cpus_allowed, new_mask); | 
| 6034 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); | 6158 | p->rt.nr_cpus_allowed = cpumask_weight(new_mask); | 
| 6035 | } | ||
| 6036 | } | 6159 | } | 
| 6037 | 6160 | ||
| 6038 | /* | 6161 | /* | 
| @@ -6130,7 +6253,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
| 6130 | if (task_cpu(p) != src_cpu) | 6253 | if (task_cpu(p) != src_cpu) | 
| 6131 | goto done; | 6254 | goto done; | 
| 6132 | /* Affinity changed (again). */ | 6255 | /* Affinity changed (again). */ | 
| 6133 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 6256 | if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 
| 6134 | goto fail; | 6257 | goto fail; | 
| 6135 | 6258 | ||
| 6136 | /* | 6259 | /* | 
| @@ -6211,6 +6334,30 @@ static void calc_global_load_remove(struct rq *rq) | |||
| 6211 | rq->calc_load_active = 0; | 6334 | rq->calc_load_active = 0; | 
| 6212 | } | 6335 | } | 
| 6213 | 6336 | ||
| 6337 | #ifdef CONFIG_CFS_BANDWIDTH | ||
| 6338 | static void unthrottle_offline_cfs_rqs(struct rq *rq) | ||
| 6339 | { | ||
| 6340 | struct cfs_rq *cfs_rq; | ||
| 6341 | |||
| 6342 | for_each_leaf_cfs_rq(rq, cfs_rq) { | ||
| 6343 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
| 6344 | |||
| 6345 | if (!cfs_rq->runtime_enabled) | ||
| 6346 | continue; | ||
| 6347 | |||
| 6348 | /* | ||
| 6349 | * clock_task is not advancing so we just need to make sure | ||
| 6350 | * there's some valid quota amount | ||
| 6351 | */ | ||
| 6352 | cfs_rq->runtime_remaining = cfs_b->quota; | ||
| 6353 | if (cfs_rq_throttled(cfs_rq)) | ||
| 6354 | unthrottle_cfs_rq(cfs_rq); | ||
| 6355 | } | ||
| 6356 | } | ||
| 6357 | #else | ||
| 6358 | static void unthrottle_offline_cfs_rqs(struct rq *rq) {} | ||
| 6359 | #endif | ||
| 6360 | |||
| 6214 | /* | 6361 | /* | 
| 6215 | * Migrate all tasks from the rq, sleeping tasks will be migrated by | 6362 | * Migrate all tasks from the rq, sleeping tasks will be migrated by | 
| 6216 | * try_to_wake_up()->select_task_rq(). | 6363 | * try_to_wake_up()->select_task_rq(). | 
| @@ -6236,6 +6383,9 @@ static void migrate_tasks(unsigned int dead_cpu) | |||
| 6236 | */ | 6383 | */ | 
| 6237 | rq->stop = NULL; | 6384 | rq->stop = NULL; | 
| 6238 | 6385 | ||
| 6386 | /* Ensure any throttled groups are reachable by pick_next_task */ | ||
| 6387 | unthrottle_offline_cfs_rqs(rq); | ||
| 6388 | |||
| 6239 | for ( ; ; ) { | 6389 | for ( ; ; ) { | 
| 6240 | /* | 6390 | /* | 
| 6241 | * There's this thread running, bail when that's the only | 6391 | * There's this thread running, bail when that's the only | 
| @@ -7443,6 +7593,7 @@ static void __sdt_free(const struct cpumask *cpu_map) | |||
| 7443 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); | 7593 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); | 
| 7444 | if (sd && (sd->flags & SD_OVERLAP)) | 7594 | if (sd && (sd->flags & SD_OVERLAP)) | 
| 7445 | free_sched_groups(sd->groups, 0); | 7595 | free_sched_groups(sd->groups, 0); | 
| 7596 | kfree(*per_cpu_ptr(sdd->sd, j)); | ||
| 7446 | kfree(*per_cpu_ptr(sdd->sg, j)); | 7597 | kfree(*per_cpu_ptr(sdd->sg, j)); | 
| 7447 | kfree(*per_cpu_ptr(sdd->sgp, j)); | 7598 | kfree(*per_cpu_ptr(sdd->sgp, j)); | 
| 7448 | } | 7599 | } | 
| @@ -7978,6 +8129,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
| 7978 | /* allow initial update_cfs_load() to truncate */ | 8129 | /* allow initial update_cfs_load() to truncate */ | 
| 7979 | cfs_rq->load_stamp = 1; | 8130 | cfs_rq->load_stamp = 1; | 
| 7980 | #endif | 8131 | #endif | 
| 8132 | init_cfs_rq_runtime(cfs_rq); | ||
| 7981 | 8133 | ||
| 7982 | tg->cfs_rq[cpu] = cfs_rq; | 8134 | tg->cfs_rq[cpu] = cfs_rq; | 
| 7983 | tg->se[cpu] = se; | 8135 | tg->se[cpu] = se; | 
| @@ -8117,6 +8269,7 @@ void __init sched_init(void) | |||
| 8117 | * We achieve this by letting root_task_group's tasks sit | 8269 | * We achieve this by letting root_task_group's tasks sit | 
| 8118 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). | 8270 | * directly in rq->cfs (i.e root_task_group->se[] = NULL). | 
| 8119 | */ | 8271 | */ | 
| 8272 | init_cfs_bandwidth(&root_task_group.cfs_bandwidth); | ||
| 8120 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); | 8273 | init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL); | 
| 8121 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 8274 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 
| 8122 | 8275 | ||
| @@ -8146,7 +8299,6 @@ void __init sched_init(void) | |||
| 8146 | rq_attach_root(rq, &def_root_domain); | 8299 | rq_attach_root(rq, &def_root_domain); | 
| 8147 | #ifdef CONFIG_NO_HZ | 8300 | #ifdef CONFIG_NO_HZ | 
| 8148 | rq->nohz_balance_kick = 0; | 8301 | rq->nohz_balance_kick = 0; | 
| 8149 | init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i)); | ||
| 8150 | #endif | 8302 | #endif | 
| 8151 | #endif | 8303 | #endif | 
| 8152 | init_rq_hrtick(rq); | 8304 | init_rq_hrtick(rq); | 
| @@ -8188,8 +8340,6 @@ void __init sched_init(void) | |||
| 8188 | */ | 8340 | */ | 
| 8189 | current->sched_class = &fair_sched_class; | 8341 | current->sched_class = &fair_sched_class; | 
| 8190 | 8342 | ||
| 8191 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | ||
| 8192 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | ||
| 8193 | #ifdef CONFIG_SMP | 8343 | #ifdef CONFIG_SMP | 
| 8194 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 8344 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 
| 8195 | #ifdef CONFIG_NO_HZ | 8345 | #ifdef CONFIG_NO_HZ | 
| @@ -8219,6 +8369,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 8219 | { | 8369 | { | 
| 8220 | static unsigned long prev_jiffy; /* ratelimiting */ | 8370 | static unsigned long prev_jiffy; /* ratelimiting */ | 
| 8221 | 8371 | ||
| 8372 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | ||
| 8222 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 8373 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 
| 8223 | system_state != SYSTEM_RUNNING || oops_in_progress) | 8374 | system_state != SYSTEM_RUNNING || oops_in_progress) | 
| 8224 | return; | 8375 | return; | 
| @@ -8358,6 +8509,8 @@ static void free_fair_sched_group(struct task_group *tg) | |||
| 8358 | { | 8509 | { | 
| 8359 | int i; | 8510 | int i; | 
| 8360 | 8511 | ||
| 8512 | destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
| 8513 | |||
| 8361 | for_each_possible_cpu(i) { | 8514 | for_each_possible_cpu(i) { | 
| 8362 | if (tg->cfs_rq) | 8515 | if (tg->cfs_rq) | 
| 8363 | kfree(tg->cfs_rq[i]); | 8516 | kfree(tg->cfs_rq[i]); | 
| @@ -8385,6 +8538,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8385 | 8538 | ||
| 8386 | tg->shares = NICE_0_LOAD; | 8539 | tg->shares = NICE_0_LOAD; | 
| 8387 | 8540 | ||
| 8541 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
| 8542 | |||
| 8388 | for_each_possible_cpu(i) { | 8543 | for_each_possible_cpu(i) { | 
| 8389 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 8544 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 
| 8390 | GFP_KERNEL, cpu_to_node(i)); | 8545 | GFP_KERNEL, cpu_to_node(i)); | 
| @@ -8660,12 +8815,7 @@ unsigned long sched_group_shares(struct task_group *tg) | |||
| 8660 | } | 8815 | } | 
| 8661 | #endif | 8816 | #endif | 
| 8662 | 8817 | ||
| 8663 | #ifdef CONFIG_RT_GROUP_SCHED | 8818 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) | 
| 8664 | /* | ||
| 8665 | * Ensure that the real time constraints are schedulable. | ||
| 8666 | */ | ||
| 8667 | static DEFINE_MUTEX(rt_constraints_mutex); | ||
| 8668 | |||
| 8669 | static unsigned long to_ratio(u64 period, u64 runtime) | 8819 | static unsigned long to_ratio(u64 period, u64 runtime) | 
| 8670 | { | 8820 | { | 
| 8671 | if (runtime == RUNTIME_INF) | 8821 | if (runtime == RUNTIME_INF) | 
| @@ -8673,6 +8823,13 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
| 8673 | 8823 | ||
| 8674 | return div64_u64(runtime << 20, period); | 8824 | return div64_u64(runtime << 20, period); | 
| 8675 | } | 8825 | } | 
| 8826 | #endif | ||
| 8827 | |||
| 8828 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 8829 | /* | ||
| 8830 | * Ensure that the real time constraints are schedulable. | ||
| 8831 | */ | ||
| 8832 | static DEFINE_MUTEX(rt_constraints_mutex); | ||
| 8676 | 8833 | ||
| 8677 | /* Must be called with tasklist_lock held */ | 8834 | /* Must be called with tasklist_lock held */ | 
| 8678 | static inline int tg_has_rt_tasks(struct task_group *tg) | 8835 | static inline int tg_has_rt_tasks(struct task_group *tg) | 
| @@ -8693,7 +8850,7 @@ struct rt_schedulable_data { | |||
| 8693 | u64 rt_runtime; | 8850 | u64 rt_runtime; | 
| 8694 | }; | 8851 | }; | 
| 8695 | 8852 | ||
| 8696 | static int tg_schedulable(struct task_group *tg, void *data) | 8853 | static int tg_rt_schedulable(struct task_group *tg, void *data) | 
| 8697 | { | 8854 | { | 
| 8698 | struct rt_schedulable_data *d = data; | 8855 | struct rt_schedulable_data *d = data; | 
| 8699 | struct task_group *child; | 8856 | struct task_group *child; | 
| @@ -8751,16 +8908,22 @@ static int tg_schedulable(struct task_group *tg, void *data) | |||
| 8751 | 8908 | ||
| 8752 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8909 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 
| 8753 | { | 8910 | { | 
| 8911 | int ret; | ||
| 8912 | |||
| 8754 | struct rt_schedulable_data data = { | 8913 | struct rt_schedulable_data data = { | 
| 8755 | .tg = tg, | 8914 | .tg = tg, | 
| 8756 | .rt_period = period, | 8915 | .rt_period = period, | 
| 8757 | .rt_runtime = runtime, | 8916 | .rt_runtime = runtime, | 
| 8758 | }; | 8917 | }; | 
| 8759 | 8918 | ||
| 8760 | return walk_tg_tree(tg_schedulable, tg_nop, &data); | 8919 | rcu_read_lock(); | 
| 8920 | ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data); | ||
| 8921 | rcu_read_unlock(); | ||
| 8922 | |||
| 8923 | return ret; | ||
| 8761 | } | 8924 | } | 
| 8762 | 8925 | ||
| 8763 | static int tg_set_bandwidth(struct task_group *tg, | 8926 | static int tg_set_rt_bandwidth(struct task_group *tg, | 
| 8764 | u64 rt_period, u64 rt_runtime) | 8927 | u64 rt_period, u64 rt_runtime) | 
| 8765 | { | 8928 | { | 
| 8766 | int i, err = 0; | 8929 | int i, err = 0; | 
| @@ -8799,7 +8962,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) | |||
| 8799 | if (rt_runtime_us < 0) | 8962 | if (rt_runtime_us < 0) | 
| 8800 | rt_runtime = RUNTIME_INF; | 8963 | rt_runtime = RUNTIME_INF; | 
| 8801 | 8964 | ||
| 8802 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | 8965 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); | 
| 8803 | } | 8966 | } | 
| 8804 | 8967 | ||
| 8805 | long sched_group_rt_runtime(struct task_group *tg) | 8968 | long sched_group_rt_runtime(struct task_group *tg) | 
| @@ -8824,7 +8987,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | |||
| 8824 | if (rt_period == 0) | 8987 | if (rt_period == 0) | 
| 8825 | return -EINVAL; | 8988 | return -EINVAL; | 
| 8826 | 8989 | ||
| 8827 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | 8990 | return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); | 
| 8828 | } | 8991 | } | 
| 8829 | 8992 | ||
| 8830 | long sched_group_rt_period(struct task_group *tg) | 8993 | long sched_group_rt_period(struct task_group *tg) | 
| @@ -9014,6 +9177,238 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | |||
| 9014 | 9177 | ||
| 9015 | return (u64) scale_load_down(tg->shares); | 9178 | return (u64) scale_load_down(tg->shares); | 
| 9016 | } | 9179 | } | 
| 9180 | |||
| 9181 | #ifdef CONFIG_CFS_BANDWIDTH | ||
| 9182 | static DEFINE_MUTEX(cfs_constraints_mutex); | ||
| 9183 | |||
| 9184 | const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ | ||
| 9185 | const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ | ||
| 9186 | |||
| 9187 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); | ||
| 9188 | |||
| 9189 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | ||
| 9190 | { | ||
| 9191 | int i, ret = 0, runtime_enabled; | ||
| 9192 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
| 9193 | |||
| 9194 | if (tg == &root_task_group) | ||
| 9195 | return -EINVAL; | ||
| 9196 | |||
| 9197 | /* | ||
| 9198 | * Ensure we have at some amount of bandwidth every period. This is | ||
| 9199 | * to prevent reaching a state of large arrears when throttled via | ||
| 9200 | * entity_tick() resulting in prolonged exit starvation. | ||
| 9201 | */ | ||
| 9202 | if (quota < min_cfs_quota_period || period < min_cfs_quota_period) | ||
| 9203 | return -EINVAL; | ||
| 9204 | |||
| 9205 | /* | ||
| 9206 | * Likewise, bound things on the otherside by preventing insane quota | ||
| 9207 | * periods. This also allows us to normalize in computing quota | ||
| 9208 | * feasibility. | ||
| 9209 | */ | ||
| 9210 | if (period > max_cfs_quota_period) | ||
| 9211 | return -EINVAL; | ||
| 9212 | |||
| 9213 | mutex_lock(&cfs_constraints_mutex); | ||
| 9214 | ret = __cfs_schedulable(tg, period, quota); | ||
| 9215 | if (ret) | ||
| 9216 | goto out_unlock; | ||
| 9217 | |||
| 9218 | runtime_enabled = quota != RUNTIME_INF; | ||
| 9219 | raw_spin_lock_irq(&cfs_b->lock); | ||
| 9220 | cfs_b->period = ns_to_ktime(period); | ||
| 9221 | cfs_b->quota = quota; | ||
| 9222 | |||
| 9223 | __refill_cfs_bandwidth_runtime(cfs_b); | ||
| 9224 | /* restart the period timer (if active) to handle new period expiry */ | ||
| 9225 | if (runtime_enabled && cfs_b->timer_active) { | ||
| 9226 | /* force a reprogram */ | ||
| 9227 | cfs_b->timer_active = 0; | ||
| 9228 | __start_cfs_bandwidth(cfs_b); | ||
| 9229 | } | ||
| 9230 | raw_spin_unlock_irq(&cfs_b->lock); | ||
| 9231 | |||
| 9232 | for_each_possible_cpu(i) { | ||
| 9233 | struct cfs_rq *cfs_rq = tg->cfs_rq[i]; | ||
| 9234 | struct rq *rq = rq_of(cfs_rq); | ||
| 9235 | |||
| 9236 | raw_spin_lock_irq(&rq->lock); | ||
| 9237 | cfs_rq->runtime_enabled = runtime_enabled; | ||
| 9238 | cfs_rq->runtime_remaining = 0; | ||
| 9239 | |||
| 9240 | if (cfs_rq_throttled(cfs_rq)) | ||
| 9241 | unthrottle_cfs_rq(cfs_rq); | ||
| 9242 | raw_spin_unlock_irq(&rq->lock); | ||
| 9243 | } | ||
| 9244 | out_unlock: | ||
| 9245 | mutex_unlock(&cfs_constraints_mutex); | ||
| 9246 | |||
| 9247 | return ret; | ||
| 9248 | } | ||
| 9249 | |||
| 9250 | int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) | ||
| 9251 | { | ||
| 9252 | u64 quota, period; | ||
| 9253 | |||
| 9254 | period = ktime_to_ns(tg_cfs_bandwidth(tg)->period); | ||
| 9255 | if (cfs_quota_us < 0) | ||
| 9256 | quota = RUNTIME_INF; | ||
| 9257 | else | ||
| 9258 | quota = (u64)cfs_quota_us * NSEC_PER_USEC; | ||
| 9259 | |||
| 9260 | return tg_set_cfs_bandwidth(tg, period, quota); | ||
| 9261 | } | ||
| 9262 | |||
| 9263 | long tg_get_cfs_quota(struct task_group *tg) | ||
| 9264 | { | ||
| 9265 | u64 quota_us; | ||
| 9266 | |||
| 9267 | if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF) | ||
| 9268 | return -1; | ||
| 9269 | |||
| 9270 | quota_us = tg_cfs_bandwidth(tg)->quota; | ||
| 9271 | do_div(quota_us, NSEC_PER_USEC); | ||
| 9272 | |||
| 9273 | return quota_us; | ||
| 9274 | } | ||
| 9275 | |||
| 9276 | int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) | ||
| 9277 | { | ||
| 9278 | u64 quota, period; | ||
| 9279 | |||
| 9280 | period = (u64)cfs_period_us * NSEC_PER_USEC; | ||
| 9281 | quota = tg_cfs_bandwidth(tg)->quota; | ||
| 9282 | |||
| 9283 | if (period <= 0) | ||
| 9284 | return -EINVAL; | ||
| 9285 | |||
| 9286 | return tg_set_cfs_bandwidth(tg, period, quota); | ||
| 9287 | } | ||
| 9288 | |||
| 9289 | long tg_get_cfs_period(struct task_group *tg) | ||
| 9290 | { | ||
| 9291 | u64 cfs_period_us; | ||
| 9292 | |||
| 9293 | cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period); | ||
| 9294 | do_div(cfs_period_us, NSEC_PER_USEC); | ||
| 9295 | |||
| 9296 | return cfs_period_us; | ||
| 9297 | } | ||
| 9298 | |||
| 9299 | static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft) | ||
| 9300 | { | ||
| 9301 | return tg_get_cfs_quota(cgroup_tg(cgrp)); | ||
| 9302 | } | ||
| 9303 | |||
| 9304 | static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype, | ||
| 9305 | s64 cfs_quota_us) | ||
| 9306 | { | ||
| 9307 | return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us); | ||
| 9308 | } | ||
| 9309 | |||
| 9310 | static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft) | ||
| 9311 | { | ||
| 9312 | return tg_get_cfs_period(cgroup_tg(cgrp)); | ||
| 9313 | } | ||
| 9314 | |||
| 9315 | static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, | ||
| 9316 | u64 cfs_period_us) | ||
| 9317 | { | ||
| 9318 | return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); | ||
| 9319 | } | ||
| 9320 | |||
| 9321 | struct cfs_schedulable_data { | ||
| 9322 | struct task_group *tg; | ||
| 9323 | u64 period, quota; | ||
| 9324 | }; | ||
| 9325 | |||
| 9326 | /* | ||
| 9327 | * normalize group quota/period to be quota/max_period | ||
| 9328 | * note: units are usecs | ||
| 9329 | */ | ||
| 9330 | static u64 normalize_cfs_quota(struct task_group *tg, | ||
| 9331 | struct cfs_schedulable_data *d) | ||
| 9332 | { | ||
| 9333 | u64 quota, period; | ||
| 9334 | |||
| 9335 | if (tg == d->tg) { | ||
| 9336 | period = d->period; | ||
| 9337 | quota = d->quota; | ||
| 9338 | } else { | ||
| 9339 | period = tg_get_cfs_period(tg); | ||
| 9340 | quota = tg_get_cfs_quota(tg); | ||
| 9341 | } | ||
| 9342 | |||
| 9343 | /* note: these should typically be equivalent */ | ||
| 9344 | if (quota == RUNTIME_INF || quota == -1) | ||
| 9345 | return RUNTIME_INF; | ||
| 9346 | |||
| 9347 | return to_ratio(period, quota); | ||
| 9348 | } | ||
| 9349 | |||
| 9350 | static int tg_cfs_schedulable_down(struct task_group *tg, void *data) | ||
| 9351 | { | ||
| 9352 | struct cfs_schedulable_data *d = data; | ||
| 9353 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
| 9354 | s64 quota = 0, parent_quota = -1; | ||
| 9355 | |||
| 9356 | if (!tg->parent) { | ||
| 9357 | quota = RUNTIME_INF; | ||
| 9358 | } else { | ||
| 9359 | struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent); | ||
| 9360 | |||
| 9361 | quota = normalize_cfs_quota(tg, d); | ||
| 9362 | parent_quota = parent_b->hierarchal_quota; | ||
| 9363 | |||
| 9364 | /* | ||
| 9365 | * ensure max(child_quota) <= parent_quota, inherit when no | ||
| 9366 | * limit is set | ||
| 9367 | */ | ||
| 9368 | if (quota == RUNTIME_INF) | ||
| 9369 | quota = parent_quota; | ||
| 9370 | else if (parent_quota != RUNTIME_INF && quota > parent_quota) | ||
| 9371 | return -EINVAL; | ||
| 9372 | } | ||
| 9373 | cfs_b->hierarchal_quota = quota; | ||
| 9374 | |||
| 9375 | return 0; | ||
| 9376 | } | ||
| 9377 | |||
| 9378 | static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota) | ||
| 9379 | { | ||
| 9380 | int ret; | ||
| 9381 | struct cfs_schedulable_data data = { | ||
| 9382 | .tg = tg, | ||
| 9383 | .period = period, | ||
| 9384 | .quota = quota, | ||
| 9385 | }; | ||
| 9386 | |||
| 9387 | if (quota != RUNTIME_INF) { | ||
| 9388 | do_div(data.period, NSEC_PER_USEC); | ||
| 9389 | do_div(data.quota, NSEC_PER_USEC); | ||
| 9390 | } | ||
| 9391 | |||
| 9392 | rcu_read_lock(); | ||
| 9393 | ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data); | ||
| 9394 | rcu_read_unlock(); | ||
| 9395 | |||
| 9396 | return ret; | ||
| 9397 | } | ||
| 9398 | |||
| 9399 | static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
| 9400 | struct cgroup_map_cb *cb) | ||
| 9401 | { | ||
| 9402 | struct task_group *tg = cgroup_tg(cgrp); | ||
| 9403 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | ||
| 9404 | |||
| 9405 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); | ||
| 9406 | cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); | ||
| 9407 | cb->fill(cb, "throttled_time", cfs_b->throttled_time); | ||
| 9408 | |||
| 9409 | return 0; | ||
| 9410 | } | ||
| 9411 | #endif /* CONFIG_CFS_BANDWIDTH */ | ||
| 9017 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 9412 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 
| 9018 | 9413 | ||
| 9019 | #ifdef CONFIG_RT_GROUP_SCHED | 9414 | #ifdef CONFIG_RT_GROUP_SCHED | 
| @@ -9048,6 +9443,22 @@ static struct cftype cpu_files[] = { | |||
| 9048 | .write_u64 = cpu_shares_write_u64, | 9443 | .write_u64 = cpu_shares_write_u64, | 
| 9049 | }, | 9444 | }, | 
| 9050 | #endif | 9445 | #endif | 
| 9446 | #ifdef CONFIG_CFS_BANDWIDTH | ||
| 9447 | { | ||
| 9448 | .name = "cfs_quota_us", | ||
| 9449 | .read_s64 = cpu_cfs_quota_read_s64, | ||
| 9450 | .write_s64 = cpu_cfs_quota_write_s64, | ||
| 9451 | }, | ||
| 9452 | { | ||
| 9453 | .name = "cfs_period_us", | ||
| 9454 | .read_u64 = cpu_cfs_period_read_u64, | ||
| 9455 | .write_u64 = cpu_cfs_period_write_u64, | ||
| 9456 | }, | ||
| 9457 | { | ||
| 9458 | .name = "stat", | ||
| 9459 | .read_map = cpu_stats_show, | ||
| 9460 | }, | ||
| 9461 | #endif | ||
| 9051 | #ifdef CONFIG_RT_GROUP_SCHED | 9462 | #ifdef CONFIG_RT_GROUP_SCHED | 
| 9052 | { | 9463 | { | 
| 9053 | .name = "rt_runtime_us", | 9464 | .name = "rt_runtime_us", | 
| @@ -9357,4 +9768,3 @@ struct cgroup_subsys cpuacct_subsys = { | |||
| 9357 | .subsys_id = cpuacct_subsys_id, | 9768 | .subsys_id = cpuacct_subsys_id, | 
| 9358 | }; | 9769 | }; | 
| 9359 | #endif /* CONFIG_CGROUP_CPUACCT */ | 9770 | #endif /* CONFIG_CGROUP_CPUACCT */ | 
| 9360 | |||
