aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c923
1 files changed, 647 insertions, 276 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index bfb8ad8ed171..99e6d850ecab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -70,10 +70,13 @@
70#include <linux/bootmem.h> 70#include <linux/bootmem.h>
71#include <linux/debugfs.h> 71#include <linux/debugfs.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/ftrace.h>
73 74
74#include <asm/tlb.h> 75#include <asm/tlb.h>
75#include <asm/irq_regs.h> 76#include <asm/irq_regs.h>
76 77
78#include "sched_cpupri.h"
79
77/* 80/*
78 * Convert user-nice values [ -20 ... 0 ... 19 ] 81 * Convert user-nice values [ -20 ... 0 ... 19 ]
79 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 82 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -289,15 +292,15 @@ struct task_group root_task_group;
289static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); 292static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
290/* Default task group's cfs_rq on each cpu */ 293/* Default task group's cfs_rq on each cpu */
291static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; 294static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
292#endif 295#endif /* CONFIG_FAIR_GROUP_SCHED */
293 296
294#ifdef CONFIG_RT_GROUP_SCHED 297#ifdef CONFIG_RT_GROUP_SCHED
295static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); 298static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
296static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; 299static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
297#endif 300#endif /* CONFIG_RT_GROUP_SCHED */
298#else 301#else /* !CONFIG_FAIR_GROUP_SCHED */
299#define root_task_group init_task_group 302#define root_task_group init_task_group
300#endif 303#endif /* CONFIG_FAIR_GROUP_SCHED */
301 304
302/* task_group_lock serializes add/remove of task groups and also changes to 305/* task_group_lock serializes add/remove of task groups and also changes to
303 * a task group's cpu shares. 306 * a task group's cpu shares.
@@ -307,17 +310,20 @@ static DEFINE_SPINLOCK(task_group_lock);
307#ifdef CONFIG_FAIR_GROUP_SCHED 310#ifdef CONFIG_FAIR_GROUP_SCHED
308#ifdef CONFIG_USER_SCHED 311#ifdef CONFIG_USER_SCHED
309# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) 312# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
310#else 313#else /* !CONFIG_USER_SCHED */
311# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 314# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
312#endif 315#endif /* CONFIG_USER_SCHED */
313 316
314/* 317/*
315 * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems. 318 * A weight of 0 or 1 can cause arithmetics problems.
319 * A weight of a cfs_rq is the sum of weights of which entities
320 * are queued on this cfs_rq, so a weight of a entity should not be
321 * too large, so as the shares value of a task group.
316 * (The default weight is 1024 - so there's no practical 322 * (The default weight is 1024 - so there's no practical
317 * limitation from this.) 323 * limitation from this.)
318 */ 324 */
319#define MIN_SHARES 2 325#define MIN_SHARES 2
320#define MAX_SHARES (ULONG_MAX - 1) 326#define MAX_SHARES (1UL << 18)
321 327
322static int init_task_group_load = INIT_TASK_GROUP_LOAD; 328static int init_task_group_load = INIT_TASK_GROUP_LOAD;
323#endif 329#endif
@@ -360,6 +366,10 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
360#else 366#else
361 367
362static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } 368static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
369static inline struct task_group *task_group(struct task_struct *p)
370{
371 return NULL;
372}
363 373
364#endif /* CONFIG_GROUP_SCHED */ 374#endif /* CONFIG_GROUP_SCHED */
365 375
@@ -370,6 +380,7 @@ struct cfs_rq {
370 380
371 u64 exec_clock; 381 u64 exec_clock;
372 u64 min_vruntime; 382 u64 min_vruntime;
383 u64 pair_start;
373 384
374 struct rb_root tasks_timeline; 385 struct rb_root tasks_timeline;
375 struct rb_node *rb_leftmost; 386 struct rb_node *rb_leftmost;
@@ -398,6 +409,31 @@ struct cfs_rq {
398 */ 409 */
399 struct list_head leaf_cfs_rq_list; 410 struct list_head leaf_cfs_rq_list;
400 struct task_group *tg; /* group that "owns" this runqueue */ 411 struct task_group *tg; /* group that "owns" this runqueue */
412
413#ifdef CONFIG_SMP
414 /*
415 * the part of load.weight contributed by tasks
416 */
417 unsigned long task_weight;
418
419 /*
420 * h_load = weight * f(tg)
421 *
422 * Where f(tg) is the recursive weight fraction assigned to
423 * this group.
424 */
425 unsigned long h_load;
426
427 /*
428 * this cpu's part of tg->shares
429 */
430 unsigned long shares;
431
432 /*
433 * load.weight at the time we set shares
434 */
435 unsigned long rq_weight;
436#endif
401#endif 437#endif
402}; 438};
403 439
@@ -449,6 +485,9 @@ struct root_domain {
449 */ 485 */
450 cpumask_t rto_mask; 486 cpumask_t rto_mask;
451 atomic_t rto_count; 487 atomic_t rto_count;
488#ifdef CONFIG_SMP
489 struct cpupri cpupri;
490#endif
452}; 491};
453 492
454/* 493/*
@@ -523,6 +562,9 @@ struct rq {
523 int push_cpu; 562 int push_cpu;
524 /* cpu of this runqueue: */ 563 /* cpu of this runqueue: */
525 int cpu; 564 int cpu;
565 int online;
566
567 unsigned long avg_load_per_task;
526 568
527 struct task_struct *migration_thread; 569 struct task_struct *migration_thread;
528 struct list_head migration_queue; 570 struct list_head migration_queue;
@@ -604,6 +646,24 @@ static inline void update_rq_clock(struct rq *rq)
604# define const_debug static const 646# define const_debug static const
605#endif 647#endif
606 648
649/**
650 * runqueue_is_locked
651 *
652 * Returns true if the current cpu runqueue is locked.
653 * This interface allows printk to be called with the runqueue lock
654 * held and know whether or not it is OK to wake up the klogd.
655 */
656int runqueue_is_locked(void)
657{
658 int cpu = get_cpu();
659 struct rq *rq = cpu_rq(cpu);
660 int ret;
661
662 ret = spin_is_locked(&rq->lock);
663 put_cpu();
664 return ret;
665}
666
607/* 667/*
608 * Debugging: various feature bits 668 * Debugging: various feature bits
609 */ 669 */
@@ -746,6 +806,12 @@ late_initcall(sched_init_debug);
746const_debug unsigned int sysctl_sched_nr_migrate = 32; 806const_debug unsigned int sysctl_sched_nr_migrate = 32;
747 807
748/* 808/*
809 * ratelimit for updating the group shares.
810 * default: 0.5ms
811 */
812const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;
813
814/*
749 * period over which we measure -rt task cpu usage in us. 815 * period over which we measure -rt task cpu usage in us.
750 * default: 1s 816 * default: 1s
751 */ 817 */
@@ -772,82 +838,6 @@ static inline u64 global_rt_runtime(void)
772 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 838 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
773} 839}
774 840
775unsigned long long time_sync_thresh = 100000;
776
777static DEFINE_PER_CPU(unsigned long long, time_offset);
778static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
779
780/*
781 * Global lock which we take every now and then to synchronize
782 * the CPUs time. This method is not warp-safe, but it's good
783 * enough to synchronize slowly diverging time sources and thus
784 * it's good enough for tracing:
785 */
786static DEFINE_SPINLOCK(time_sync_lock);
787static unsigned long long prev_global_time;
788
789static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
790{
791 /*
792 * We want this inlined, to not get tracer function calls
793 * in this critical section:
794 */
795 spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_);
796 __raw_spin_lock(&time_sync_lock.raw_lock);
797
798 if (time < prev_global_time) {
799 per_cpu(time_offset, cpu) += prev_global_time - time;
800 time = prev_global_time;
801 } else {
802 prev_global_time = time;
803 }
804
805 __raw_spin_unlock(&time_sync_lock.raw_lock);
806 spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_);
807
808 return time;
809}
810
811static unsigned long long __cpu_clock(int cpu)
812{
813 unsigned long long now;
814
815 /*
816 * Only call sched_clock() if the scheduler has already been
817 * initialized (some code might call cpu_clock() very early):
818 */
819 if (unlikely(!scheduler_running))
820 return 0;
821
822 now = sched_clock_cpu(cpu);
823
824 return now;
825}
826
827/*
828 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
829 * clock constructed from sched_clock():
830 */
831unsigned long long cpu_clock(int cpu)
832{
833 unsigned long long prev_cpu_time, time, delta_time;
834 unsigned long flags;
835
836 local_irq_save(flags);
837 prev_cpu_time = per_cpu(prev_cpu_time, cpu);
838 time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
839 delta_time = time-prev_cpu_time;
840
841 if (unlikely(delta_time > time_sync_thresh)) {
842 time = __sync_cpu_clock(time, cpu);
843 per_cpu(prev_cpu_time, cpu) = time;
844 }
845 local_irq_restore(flags);
846
847 return time;
848}
849EXPORT_SYMBOL_GPL(cpu_clock);
850
851#ifndef prepare_arch_switch 841#ifndef prepare_arch_switch
852# define prepare_arch_switch(next) do { } while (0) 842# define prepare_arch_switch(next) do { } while (0)
853#endif 843#endif
@@ -1124,6 +1114,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1124 return HRTIMER_NORESTART; 1114 return HRTIMER_NORESTART;
1125} 1115}
1126 1116
1117#ifdef CONFIG_SMP
1127static void hotplug_hrtick_disable(int cpu) 1118static void hotplug_hrtick_disable(int cpu)
1128{ 1119{
1129 struct rq *rq = cpu_rq(cpu); 1120 struct rq *rq = cpu_rq(cpu);
@@ -1179,6 +1170,7 @@ static void init_hrtick(void)
1179{ 1170{
1180 hotcpu_notifier(hotplug_hrtick, 0); 1171 hotcpu_notifier(hotplug_hrtick, 0);
1181} 1172}
1173#endif /* CONFIG_SMP */
1182 1174
1183static void init_rq_hrtick(struct rq *rq) 1175static void init_rq_hrtick(struct rq *rq)
1184{ 1176{
@@ -1308,15 +1300,15 @@ void wake_up_idle_cpu(int cpu)
1308 if (!tsk_is_polling(rq->idle)) 1300 if (!tsk_is_polling(rq->idle))
1309 smp_send_reschedule(cpu); 1301 smp_send_reschedule(cpu);
1310} 1302}
1311#endif 1303#endif /* CONFIG_NO_HZ */
1312 1304
1313#else 1305#else /* !CONFIG_SMP */
1314static void __resched_task(struct task_struct *p, int tif_bit) 1306static void __resched_task(struct task_struct *p, int tif_bit)
1315{ 1307{
1316 assert_spin_locked(&task_rq(p)->lock); 1308 assert_spin_locked(&task_rq(p)->lock);
1317 set_tsk_thread_flag(p, tif_bit); 1309 set_tsk_thread_flag(p, tif_bit);
1318} 1310}
1319#endif 1311#endif /* CONFIG_SMP */
1320 1312
1321#if BITS_PER_LONG == 32 1313#if BITS_PER_LONG == 32
1322# define WMULT_CONST (~0UL) 1314# define WMULT_CONST (~0UL)
@@ -1331,14 +1323,22 @@ static void __resched_task(struct task_struct *p, int tif_bit)
1331 */ 1323 */
1332#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 1324#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
1333 1325
1326/*
1327 * delta *= weight / lw
1328 */
1334static unsigned long 1329static unsigned long
1335calc_delta_mine(unsigned long delta_exec, unsigned long weight, 1330calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1336 struct load_weight *lw) 1331 struct load_weight *lw)
1337{ 1332{
1338 u64 tmp; 1333 u64 tmp;
1339 1334
1340 if (!lw->inv_weight) 1335 if (!lw->inv_weight) {
1341 lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1); 1336 if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
1337 lw->inv_weight = 1;
1338 else
1339 lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)
1340 / (lw->weight+1);
1341 }
1342 1342
1343 tmp = (u64)delta_exec * weight; 1343 tmp = (u64)delta_exec * weight;
1344 /* 1344 /*
@@ -1353,12 +1353,6 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1353 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 1353 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
1354} 1354}
1355 1355
1356static inline unsigned long
1357calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
1358{
1359 return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
1360}
1361
1362static inline void update_load_add(struct load_weight *lw, unsigned long inc) 1356static inline void update_load_add(struct load_weight *lw, unsigned long inc)
1363{ 1357{
1364 lw->weight += inc; 1358 lw->weight += inc;
@@ -1469,17 +1463,211 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1469#ifdef CONFIG_SMP 1463#ifdef CONFIG_SMP
1470static unsigned long source_load(int cpu, int type); 1464static unsigned long source_load(int cpu, int type);
1471static unsigned long target_load(int cpu, int type); 1465static unsigned long target_load(int cpu, int type);
1472static unsigned long cpu_avg_load_per_task(int cpu);
1473static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1466static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1474#else /* CONFIG_SMP */ 1467
1468static unsigned long cpu_avg_load_per_task(int cpu)
1469{
1470 struct rq *rq = cpu_rq(cpu);
1471
1472 if (rq->nr_running)
1473 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1474
1475 return rq->avg_load_per_task;
1476}
1475 1477
1476#ifdef CONFIG_FAIR_GROUP_SCHED 1478#ifdef CONFIG_FAIR_GROUP_SCHED
1477static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) 1479
1480typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
1481
1482/*
1483 * Iterate the full tree, calling @down when first entering a node and @up when
1484 * leaving it for the final time.
1485 */
1486static void
1487walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
1478{ 1488{
1489 struct task_group *parent, *child;
1490
1491 rcu_read_lock();
1492 parent = &root_task_group;
1493down:
1494 (*down)(parent, cpu, sd);
1495 list_for_each_entry_rcu(child, &parent->children, siblings) {
1496 parent = child;
1497 goto down;
1498
1499up:
1500 continue;
1501 }
1502 (*up)(parent, cpu, sd);
1503
1504 child = parent;
1505 parent = parent->parent;
1506 if (parent)
1507 goto up;
1508 rcu_read_unlock();
1479} 1509}
1510
1511static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1512
1513/*
1514 * Calculate and set the cpu's group shares.
1515 */
1516static void
1517__update_group_shares_cpu(struct task_group *tg, int cpu,
1518 unsigned long sd_shares, unsigned long sd_rq_weight)
1519{
1520 int boost = 0;
1521 unsigned long shares;
1522 unsigned long rq_weight;
1523
1524 if (!tg->se[cpu])
1525 return;
1526
1527 rq_weight = tg->cfs_rq[cpu]->load.weight;
1528
1529 /*
1530 * If there are currently no tasks on the cpu pretend there is one of
1531 * average load so that when a new task gets to run here it will not
1532 * get delayed by group starvation.
1533 */
1534 if (!rq_weight) {
1535 boost = 1;
1536 rq_weight = NICE_0_LOAD;
1537 }
1538
1539 if (unlikely(rq_weight > sd_rq_weight))
1540 rq_weight = sd_rq_weight;
1541
1542 /*
1543 * \Sum shares * rq_weight
1544 * shares = -----------------------
1545 * \Sum rq_weight
1546 *
1547 */
1548 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
1549
1550 /*
1551 * record the actual number of shares, not the boosted amount.
1552 */
1553 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1554 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1555
1556 if (shares < MIN_SHARES)
1557 shares = MIN_SHARES;
1558 else if (shares > MAX_SHARES)
1559 shares = MAX_SHARES;
1560
1561 __set_se_shares(tg->se[cpu], shares);
1562}
1563
1564/*
1565 * Re-compute the task group their per cpu shares over the given domain.
1566 * This needs to be done in a bottom-up fashion because the rq weight of a
1567 * parent group depends on the shares of its child groups.
1568 */
1569static void
1570tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
1571{
1572 unsigned long rq_weight = 0;
1573 unsigned long shares = 0;
1574 int i;
1575
1576 for_each_cpu_mask(i, sd->span) {
1577 rq_weight += tg->cfs_rq[i]->load.weight;
1578 shares += tg->cfs_rq[i]->shares;
1579 }
1580
1581 if ((!shares && rq_weight) || shares > tg->shares)
1582 shares = tg->shares;
1583
1584 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1585 shares = tg->shares;
1586
1587 if (!rq_weight)
1588 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1589
1590 for_each_cpu_mask(i, sd->span) {
1591 struct rq *rq = cpu_rq(i);
1592 unsigned long flags;
1593
1594 spin_lock_irqsave(&rq->lock, flags);
1595 __update_group_shares_cpu(tg, i, shares, rq_weight);
1596 spin_unlock_irqrestore(&rq->lock, flags);
1597 }
1598}
1599
1600/*
1601 * Compute the cpu's hierarchical load factor for each task group.
1602 * This needs to be done in a top-down fashion because the load of a child
1603 * group is a fraction of its parents load.
1604 */
1605static void
1606tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
1607{
1608 unsigned long load;
1609
1610 if (!tg->parent) {
1611 load = cpu_rq(cpu)->load.weight;
1612 } else {
1613 load = tg->parent->cfs_rq[cpu]->h_load;
1614 load *= tg->cfs_rq[cpu]->shares;
1615 load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
1616 }
1617
1618 tg->cfs_rq[cpu]->h_load = load;
1619}
1620
1621static void
1622tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
1623{
1624}
1625
1626static void update_shares(struct sched_domain *sd)
1627{
1628 u64 now = cpu_clock(raw_smp_processor_id());
1629 s64 elapsed = now - sd->last_update;
1630
1631 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
1632 sd->last_update = now;
1633 walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
1634 }
1635}
1636
1637static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1638{
1639 spin_unlock(&rq->lock);
1640 update_shares(sd);
1641 spin_lock(&rq->lock);
1642}
1643
1644static void update_h_load(int cpu)
1645{
1646 walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
1647}
1648
1649#else
1650
1651static inline void update_shares(struct sched_domain *sd)
1652{
1653}
1654
1655static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1656{
1657}
1658
1480#endif 1659#endif
1481 1660
1482#endif /* CONFIG_SMP */ 1661#endif
1662
1663#ifdef CONFIG_FAIR_GROUP_SCHED
1664static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1665{
1666#ifdef CONFIG_SMP
1667 cfs_rq->shares = shares;
1668#endif
1669}
1670#endif
1483 1671
1484#include "sched_stats.h" 1672#include "sched_stats.h"
1485#include "sched_idletask.c" 1673#include "sched_idletask.c"
@@ -1490,27 +1678,17 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1490#endif 1678#endif
1491 1679
1492#define sched_class_highest (&rt_sched_class) 1680#define sched_class_highest (&rt_sched_class)
1681#define for_each_class(class) \
1682 for (class = sched_class_highest; class; class = class->next)
1493 1683
1494static inline void inc_load(struct rq *rq, const struct task_struct *p) 1684static void inc_nr_running(struct rq *rq)
1495{
1496 update_load_add(&rq->load, p->se.load.weight);
1497}
1498
1499static inline void dec_load(struct rq *rq, const struct task_struct *p)
1500{
1501 update_load_sub(&rq->load, p->se.load.weight);
1502}
1503
1504static void inc_nr_running(struct task_struct *p, struct rq *rq)
1505{ 1685{
1506 rq->nr_running++; 1686 rq->nr_running++;
1507 inc_load(rq, p);
1508} 1687}
1509 1688
1510static void dec_nr_running(struct task_struct *p, struct rq *rq) 1689static void dec_nr_running(struct rq *rq)
1511{ 1690{
1512 rq->nr_running--; 1691 rq->nr_running--;
1513 dec_load(rq, p);
1514} 1692}
1515 1693
1516static void set_load_weight(struct task_struct *p) 1694static void set_load_weight(struct task_struct *p)
@@ -1534,6 +1712,12 @@ static void set_load_weight(struct task_struct *p)
1534 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; 1712 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
1535} 1713}
1536 1714
1715static void update_avg(u64 *avg, u64 sample)
1716{
1717 s64 diff = sample - *avg;
1718 *avg += diff >> 3;
1719}
1720
1537static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) 1721static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1538{ 1722{
1539 sched_info_queued(p); 1723 sched_info_queued(p);
@@ -1543,6 +1727,13 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
1543 1727
1544static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) 1728static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
1545{ 1729{
1730 if (sleep && p->se.last_wakeup) {
1731 update_avg(&p->se.avg_overlap,
1732 p->se.sum_exec_runtime - p->se.last_wakeup);
1733 p->se.last_wakeup = 0;
1734 }
1735
1736 sched_info_dequeued(p);
1546 p->sched_class->dequeue_task(rq, p, sleep); 1737 p->sched_class->dequeue_task(rq, p, sleep);
1547 p->se.on_rq = 0; 1738 p->se.on_rq = 0;
1548} 1739}
@@ -1602,7 +1793,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
1602 rq->nr_uninterruptible--; 1793 rq->nr_uninterruptible--;
1603 1794
1604 enqueue_task(rq, p, wakeup); 1795 enqueue_task(rq, p, wakeup);
1605 inc_nr_running(p, rq); 1796 inc_nr_running(rq);
1606} 1797}
1607 1798
1608/* 1799/*
@@ -1614,7 +1805,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
1614 rq->nr_uninterruptible++; 1805 rq->nr_uninterruptible++;
1615 1806
1616 dequeue_task(rq, p, sleep); 1807 dequeue_task(rq, p, sleep);
1617 dec_nr_running(p, rq); 1808 dec_nr_running(rq);
1618} 1809}
1619 1810
1620/** 1811/**
@@ -1626,12 +1817,6 @@ inline int task_curr(const struct task_struct *p)
1626 return cpu_curr(task_cpu(p)) == p; 1817 return cpu_curr(task_cpu(p)) == p;
1627} 1818}
1628 1819
1629/* Used instead of source_load when we know the type == 0 */
1630unsigned long weighted_cpuload(const int cpu)
1631{
1632 return cpu_rq(cpu)->load.weight;
1633}
1634
1635static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) 1820static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1636{ 1821{
1637 set_task_rq(p, cpu); 1822 set_task_rq(p, cpu);
@@ -1660,6 +1845,12 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1660 1845
1661#ifdef CONFIG_SMP 1846#ifdef CONFIG_SMP
1662 1847
1848/* Used instead of source_load when we know the type == 0 */
1849static unsigned long weighted_cpuload(const int cpu)
1850{
1851 return cpu_rq(cpu)->load.weight;
1852}
1853
1663/* 1854/*
1664 * Is this task likely cache-hot: 1855 * Is this task likely cache-hot:
1665 */ 1856 */
@@ -1870,7 +2061,7 @@ static unsigned long source_load(int cpu, int type)
1870 struct rq *rq = cpu_rq(cpu); 2061 struct rq *rq = cpu_rq(cpu);
1871 unsigned long total = weighted_cpuload(cpu); 2062 unsigned long total = weighted_cpuload(cpu);
1872 2063
1873 if (type == 0) 2064 if (type == 0 || !sched_feat(LB_BIAS))
1874 return total; 2065 return total;
1875 2066
1876 return min(rq->cpu_load[type-1], total); 2067 return min(rq->cpu_load[type-1], total);
@@ -1885,25 +2076,13 @@ static unsigned long target_load(int cpu, int type)
1885 struct rq *rq = cpu_rq(cpu); 2076 struct rq *rq = cpu_rq(cpu);
1886 unsigned long total = weighted_cpuload(cpu); 2077 unsigned long total = weighted_cpuload(cpu);
1887 2078
1888 if (type == 0) 2079 if (type == 0 || !sched_feat(LB_BIAS))
1889 return total; 2080 return total;
1890 2081
1891 return max(rq->cpu_load[type-1], total); 2082 return max(rq->cpu_load[type-1], total);
1892} 2083}
1893 2084
1894/* 2085/*
1895 * Return the average load per task on the cpu's run queue
1896 */
1897static unsigned long cpu_avg_load_per_task(int cpu)
1898{
1899 struct rq *rq = cpu_rq(cpu);
1900 unsigned long total = weighted_cpuload(cpu);
1901 unsigned long n = rq->nr_running;
1902
1903 return n ? total / n : SCHED_LOAD_SCALE;
1904}
1905
1906/*
1907 * find_idlest_group finds and returns the least busy CPU group within the 2086 * find_idlest_group finds and returns the least busy CPU group within the
1908 * domain. 2087 * domain.
1909 */ 2088 */
@@ -2009,6 +2188,9 @@ static int sched_balance_self(int cpu, int flag)
2009 sd = tmp; 2188 sd = tmp;
2010 } 2189 }
2011 2190
2191 if (sd)
2192 update_shares(sd);
2193
2012 while (sd) { 2194 while (sd) {
2013 cpumask_t span, tmpmask; 2195 cpumask_t span, tmpmask;
2014 struct sched_group *group; 2196 struct sched_group *group;
@@ -2075,6 +2257,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2075 if (!sched_feat(SYNC_WAKEUPS)) 2257 if (!sched_feat(SYNC_WAKEUPS))
2076 sync = 0; 2258 sync = 0;
2077 2259
2260#ifdef CONFIG_SMP
2261 if (sched_feat(LB_WAKEUP_UPDATE)) {
2262 struct sched_domain *sd;
2263
2264 this_cpu = raw_smp_processor_id();
2265 cpu = task_cpu(p);
2266
2267 for_each_domain(this_cpu, sd) {
2268 if (cpu_isset(cpu, sd->span)) {
2269 update_shares(sd);
2270 break;
2271 }
2272 }
2273 }
2274#endif
2275
2078 smp_wmb(); 2276 smp_wmb();
2079 rq = task_rq_lock(p, &flags); 2277 rq = task_rq_lock(p, &flags);
2080 old_state = p->state; 2278 old_state = p->state;
@@ -2121,7 +2319,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2121 } 2319 }
2122 } 2320 }
2123 } 2321 }
2124#endif 2322#endif /* CONFIG_SCHEDSTATS */
2125 2323
2126out_activate: 2324out_activate:
2127#endif /* CONFIG_SMP */ 2325#endif /* CONFIG_SMP */
@@ -2139,6 +2337,9 @@ out_activate:
2139 success = 1; 2337 success = 1;
2140 2338
2141out_running: 2339out_running:
2340 trace_mark(kernel_sched_wakeup,
2341 "pid %d state %ld ## rq %p task %p rq->curr %p",
2342 p->pid, p->state, rq, p, rq->curr);
2142 check_preempt_curr(rq, p); 2343 check_preempt_curr(rq, p);
2143 2344
2144 p->state = TASK_RUNNING; 2345 p->state = TASK_RUNNING;
@@ -2147,6 +2348,8 @@ out_running:
2147 p->sched_class->task_wake_up(rq, p); 2348 p->sched_class->task_wake_up(rq, p);
2148#endif 2349#endif
2149out: 2350out:
2351 current->se.last_wakeup = current->se.sum_exec_runtime;
2352
2150 task_rq_unlock(rq, &flags); 2353 task_rq_unlock(rq, &flags);
2151 2354
2152 return success; 2355 return success;
@@ -2267,8 +2470,11 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2267 * management (if any): 2470 * management (if any):
2268 */ 2471 */
2269 p->sched_class->task_new(rq, p); 2472 p->sched_class->task_new(rq, p);
2270 inc_nr_running(p, rq); 2473 inc_nr_running(rq);
2271 } 2474 }
2475 trace_mark(kernel_sched_wakeup_new,
2476 "pid %d state %ld ## rq %p task %p rq->curr %p",
2477 p->pid, p->state, rq, p, rq->curr);
2272 check_preempt_curr(rq, p); 2478 check_preempt_curr(rq, p);
2273#ifdef CONFIG_SMP 2479#ifdef CONFIG_SMP
2274 if (p->sched_class->task_wake_up) 2480 if (p->sched_class->task_wake_up)
@@ -2321,7 +2527,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
2321 notifier->ops->sched_out(notifier, next); 2527 notifier->ops->sched_out(notifier, next);
2322} 2528}
2323 2529
2324#else 2530#else /* !CONFIG_PREEMPT_NOTIFIERS */
2325 2531
2326static void fire_sched_in_preempt_notifiers(struct task_struct *curr) 2532static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
2327{ 2533{
@@ -2333,7 +2539,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
2333{ 2539{
2334} 2540}
2335 2541
2336#endif 2542#endif /* CONFIG_PREEMPT_NOTIFIERS */
2337 2543
2338/** 2544/**
2339 * prepare_task_switch - prepare to switch tasks 2545 * prepare_task_switch - prepare to switch tasks
@@ -2441,6 +2647,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
2441 struct mm_struct *mm, *oldmm; 2647 struct mm_struct *mm, *oldmm;
2442 2648
2443 prepare_task_switch(rq, prev, next); 2649 prepare_task_switch(rq, prev, next);
2650 trace_mark(kernel_sched_schedule,
2651 "prev_pid %d next_pid %d prev_state %ld "
2652 "## rq %p prev %p next %p",
2653 prev->pid, next->pid, prev->state,
2654 rq, prev, next);
2444 mm = next->mm; 2655 mm = next->mm;
2445 oldmm = prev->active_mm; 2656 oldmm = prev->active_mm;
2446 /* 2657 /*
@@ -2775,7 +2986,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2775 enum cpu_idle_type idle, int *all_pinned, 2986 enum cpu_idle_type idle, int *all_pinned,
2776 int *this_best_prio, struct rq_iterator *iterator) 2987 int *this_best_prio, struct rq_iterator *iterator)
2777{ 2988{
2778 int loops = 0, pulled = 0, pinned = 0, skip_for_load; 2989 int loops = 0, pulled = 0, pinned = 0;
2779 struct task_struct *p; 2990 struct task_struct *p;
2780 long rem_load_move = max_load_move; 2991 long rem_load_move = max_load_move;
2781 2992
@@ -2791,14 +3002,8 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2791next: 3002next:
2792 if (!p || loops++ > sysctl_sched_nr_migrate) 3003 if (!p || loops++ > sysctl_sched_nr_migrate)
2793 goto out; 3004 goto out;
2794 /* 3005
2795 * To help distribute high priority tasks across CPUs we don't 3006 if ((p->se.load.weight >> 1) > rem_load_move ||
2796 * skip a task if it will be the highest priority task (i.e. smallest
2797 * prio value) on its new queue regardless of its load weight
2798 */
2799 skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
2800 SCHED_LOAD_SCALE_FUZZ;
2801 if ((skip_for_load && p->prio >= *this_best_prio) ||
2802 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { 3007 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
2803 p = iterator->next(iterator->arg); 3008 p = iterator->next(iterator->arg);
2804 goto next; 3009 goto next;
@@ -2853,6 +3058,10 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2853 max_load_move - total_load_moved, 3058 max_load_move - total_load_moved,
2854 sd, idle, all_pinned, &this_best_prio); 3059 sd, idle, all_pinned, &this_best_prio);
2855 class = class->next; 3060 class = class->next;
3061
3062 if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
3063 break;
3064
2856 } while (class && max_load_move > total_load_moved); 3065 } while (class && max_load_move > total_load_moved);
2857 3066
2858 return total_load_moved > 0; 3067 return total_load_moved > 0;
@@ -2929,6 +3138,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2929 max_load = this_load = total_load = total_pwr = 0; 3138 max_load = this_load = total_load = total_pwr = 0;
2930 busiest_load_per_task = busiest_nr_running = 0; 3139 busiest_load_per_task = busiest_nr_running = 0;
2931 this_load_per_task = this_nr_running = 0; 3140 this_load_per_task = this_nr_running = 0;
3141
2932 if (idle == CPU_NOT_IDLE) 3142 if (idle == CPU_NOT_IDLE)
2933 load_idx = sd->busy_idx; 3143 load_idx = sd->busy_idx;
2934 else if (idle == CPU_NEWLY_IDLE) 3144 else if (idle == CPU_NEWLY_IDLE)
@@ -2943,6 +3153,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2943 int __group_imb = 0; 3153 int __group_imb = 0;
2944 unsigned int balance_cpu = -1, first_idle_cpu = 0; 3154 unsigned int balance_cpu = -1, first_idle_cpu = 0;
2945 unsigned long sum_nr_running, sum_weighted_load; 3155 unsigned long sum_nr_running, sum_weighted_load;
3156 unsigned long sum_avg_load_per_task;
3157 unsigned long avg_load_per_task;
2946 3158
2947 local_group = cpu_isset(this_cpu, group->cpumask); 3159 local_group = cpu_isset(this_cpu, group->cpumask);
2948 3160
@@ -2951,6 +3163,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2951 3163
2952 /* Tally up the load of all CPUs in the group */ 3164 /* Tally up the load of all CPUs in the group */
2953 sum_weighted_load = sum_nr_running = avg_load = 0; 3165 sum_weighted_load = sum_nr_running = avg_load = 0;
3166 sum_avg_load_per_task = avg_load_per_task = 0;
3167
2954 max_cpu_load = 0; 3168 max_cpu_load = 0;
2955 min_cpu_load = ~0UL; 3169 min_cpu_load = ~0UL;
2956 3170
@@ -2984,6 +3198,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2984 avg_load += load; 3198 avg_load += load;
2985 sum_nr_running += rq->nr_running; 3199 sum_nr_running += rq->nr_running;
2986 sum_weighted_load += weighted_cpuload(i); 3200 sum_weighted_load += weighted_cpuload(i);
3201
3202 sum_avg_load_per_task += cpu_avg_load_per_task(i);
2987 } 3203 }
2988 3204
2989 /* 3205 /*
@@ -3005,7 +3221,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3005 avg_load = sg_div_cpu_power(group, 3221 avg_load = sg_div_cpu_power(group,
3006 avg_load * SCHED_LOAD_SCALE); 3222 avg_load * SCHED_LOAD_SCALE);
3007 3223
3008 if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE) 3224
3225 /*
3226 * Consider the group unbalanced when the imbalance is larger
3227 * than the average weight of two tasks.
3228 *
3229 * APZ: with cgroup the avg task weight can vary wildly and
3230 * might not be a suitable number - should we keep a
3231 * normalized nr_running number somewhere that negates
3232 * the hierarchy?
3233 */
3234 avg_load_per_task = sg_div_cpu_power(group,
3235 sum_avg_load_per_task * SCHED_LOAD_SCALE);
3236
3237 if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
3009 __group_imb = 1; 3238 __group_imb = 1;
3010 3239
3011 group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; 3240 group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
@@ -3146,9 +3375,9 @@ small_imbalance:
3146 if (busiest_load_per_task > this_load_per_task) 3375 if (busiest_load_per_task > this_load_per_task)
3147 imbn = 1; 3376 imbn = 1;
3148 } else 3377 } else
3149 this_load_per_task = SCHED_LOAD_SCALE; 3378 this_load_per_task = cpu_avg_load_per_task(this_cpu);
3150 3379
3151 if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >= 3380 if (max_load - this_load + 2*busiest_load_per_task >=
3152 busiest_load_per_task * imbn) { 3381 busiest_load_per_task * imbn) {
3153 *imbalance = busiest_load_per_task; 3382 *imbalance = busiest_load_per_task;
3154 return busiest; 3383 return busiest;
@@ -3274,6 +3503,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3274 schedstat_inc(sd, lb_count[idle]); 3503 schedstat_inc(sd, lb_count[idle]);
3275 3504
3276redo: 3505redo:
3506 update_shares(sd);
3277 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 3507 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
3278 cpus, balance); 3508 cpus, balance);
3279 3509
@@ -3376,8 +3606,9 @@ redo:
3376 3606
3377 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3607 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3378 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 3608 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3379 return -1; 3609 ld_moved = -1;
3380 return ld_moved; 3610
3611 goto out;
3381 3612
3382out_balanced: 3613out_balanced:
3383 schedstat_inc(sd, lb_balanced[idle]); 3614 schedstat_inc(sd, lb_balanced[idle]);
@@ -3392,8 +3623,13 @@ out_one_pinned:
3392 3623
3393 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3624 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3394 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 3625 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3395 return -1; 3626 ld_moved = -1;
3396 return 0; 3627 else
3628 ld_moved = 0;
3629out:
3630 if (ld_moved)
3631 update_shares(sd);
3632 return ld_moved;
3397} 3633}
3398 3634
3399/* 3635/*
@@ -3428,6 +3664,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3428 3664
3429 schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); 3665 schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]);
3430redo: 3666redo:
3667 update_shares_locked(this_rq, sd);
3431 group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, 3668 group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE,
3432 &sd_idle, cpus, NULL); 3669 &sd_idle, cpus, NULL);
3433 if (!group) { 3670 if (!group) {
@@ -3471,6 +3708,7 @@ redo:
3471 } else 3708 } else
3472 sd->nr_balance_failed = 0; 3709 sd->nr_balance_failed = 0;
3473 3710
3711 update_shares_locked(this_rq, sd);
3474 return ld_moved; 3712 return ld_moved;
3475 3713
3476out_balanced: 3714out_balanced:
@@ -3662,6 +3900,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3662 /* Earliest time when we have to do rebalance again */ 3900 /* Earliest time when we have to do rebalance again */
3663 unsigned long next_balance = jiffies + 60*HZ; 3901 unsigned long next_balance = jiffies + 60*HZ;
3664 int update_next_balance = 0; 3902 int update_next_balance = 0;
3903 int need_serialize;
3665 cpumask_t tmp; 3904 cpumask_t tmp;
3666 3905
3667 for_each_domain(cpu, sd) { 3906 for_each_domain(cpu, sd) {
@@ -3679,8 +3918,9 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3679 if (interval > HZ*NR_CPUS/10) 3918 if (interval > HZ*NR_CPUS/10)
3680 interval = HZ*NR_CPUS/10; 3919 interval = HZ*NR_CPUS/10;
3681 3920
3921 need_serialize = sd->flags & SD_SERIALIZE;
3682 3922
3683 if (sd->flags & SD_SERIALIZE) { 3923 if (need_serialize) {
3684 if (!spin_trylock(&balancing)) 3924 if (!spin_trylock(&balancing))
3685 goto out; 3925 goto out;
3686 } 3926 }
@@ -3696,7 +3936,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3696 } 3936 }
3697 sd->last_balance = jiffies; 3937 sd->last_balance = jiffies;
3698 } 3938 }
3699 if (sd->flags & SD_SERIALIZE) 3939 if (need_serialize)
3700 spin_unlock(&balancing); 3940 spin_unlock(&balancing);
3701out: 3941out:
3702 if (time_after(next_balance, sd->last_balance + interval)) { 3942 if (time_after(next_balance, sd->last_balance + interval)) {
@@ -4011,26 +4251,44 @@ void scheduler_tick(void)
4011#endif 4251#endif
4012} 4252}
4013 4253
4014#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 4254#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4255 defined(CONFIG_PREEMPT_TRACER))
4256
4257static inline unsigned long get_parent_ip(unsigned long addr)
4258{
4259 if (in_lock_functions(addr)) {
4260 addr = CALLER_ADDR2;
4261 if (in_lock_functions(addr))
4262 addr = CALLER_ADDR3;
4263 }
4264 return addr;
4265}
4015 4266
4016void __kprobes add_preempt_count(int val) 4267void __kprobes add_preempt_count(int val)
4017{ 4268{
4269#ifdef CONFIG_DEBUG_PREEMPT
4018 /* 4270 /*
4019 * Underflow? 4271 * Underflow?
4020 */ 4272 */
4021 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) 4273 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
4022 return; 4274 return;
4275#endif
4023 preempt_count() += val; 4276 preempt_count() += val;
4277#ifdef CONFIG_DEBUG_PREEMPT
4024 /* 4278 /*
4025 * Spinlock count overflowing soon? 4279 * Spinlock count overflowing soon?
4026 */ 4280 */
4027 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 4281 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
4028 PREEMPT_MASK - 10); 4282 PREEMPT_MASK - 10);
4283#endif
4284 if (preempt_count() == val)
4285 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4029} 4286}
4030EXPORT_SYMBOL(add_preempt_count); 4287EXPORT_SYMBOL(add_preempt_count);
4031 4288
4032void __kprobes sub_preempt_count(int val) 4289void __kprobes sub_preempt_count(int val)
4033{ 4290{
4291#ifdef CONFIG_DEBUG_PREEMPT
4034 /* 4292 /*
4035 * Underflow? 4293 * Underflow?
4036 */ 4294 */
@@ -4042,7 +4300,10 @@ void __kprobes sub_preempt_count(int val)
4042 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && 4300 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
4043 !(preempt_count() & PREEMPT_MASK))) 4301 !(preempt_count() & PREEMPT_MASK)))
4044 return; 4302 return;
4303#endif
4045 4304
4305 if (preempt_count() == val)
4306 trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4046 preempt_count() -= val; 4307 preempt_count() -= val;
4047} 4308}
4048EXPORT_SYMBOL(sub_preempt_count); 4309EXPORT_SYMBOL(sub_preempt_count);
@@ -4060,6 +4321,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
4060 prev->comm, prev->pid, preempt_count()); 4321 prev->comm, prev->pid, preempt_count());
4061 4322
4062 debug_show_held_locks(prev); 4323 debug_show_held_locks(prev);
4324 print_modules();
4063 if (irqs_disabled()) 4325 if (irqs_disabled())
4064 print_irqtrace_events(prev); 4326 print_irqtrace_events(prev);
4065 4327
@@ -4133,7 +4395,7 @@ asmlinkage void __sched schedule(void)
4133 struct task_struct *prev, *next; 4395 struct task_struct *prev, *next;
4134 unsigned long *switch_count; 4396 unsigned long *switch_count;
4135 struct rq *rq; 4397 struct rq *rq;
4136 int cpu; 4398 int cpu, hrtick = sched_feat(HRTICK);
4137 4399
4138need_resched: 4400need_resched:
4139 preempt_disable(); 4401 preempt_disable();
@@ -4148,7 +4410,8 @@ need_resched_nonpreemptible:
4148 4410
4149 schedule_debug(prev); 4411 schedule_debug(prev);
4150 4412
4151 hrtick_clear(rq); 4413 if (hrtick)
4414 hrtick_clear(rq);
4152 4415
4153 /* 4416 /*
4154 * Do the rq-clock update outside the rq lock: 4417 * Do the rq-clock update outside the rq lock:
@@ -4159,12 +4422,10 @@ need_resched_nonpreemptible:
4159 clear_tsk_need_resched(prev); 4422 clear_tsk_need_resched(prev);
4160 4423
4161 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4424 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
4162 if (unlikely((prev->state & TASK_INTERRUPTIBLE) && 4425 if (unlikely(signal_pending_state(prev->state, prev)))
4163 signal_pending(prev))) {
4164 prev->state = TASK_RUNNING; 4426 prev->state = TASK_RUNNING;
4165 } else { 4427 else
4166 deactivate_task(rq, prev, 1); 4428 deactivate_task(rq, prev, 1);
4167 }
4168 switch_count = &prev->nvcsw; 4429 switch_count = &prev->nvcsw;
4169 } 4430 }
4170 4431
@@ -4196,7 +4457,8 @@ need_resched_nonpreemptible:
4196 } else 4457 } else
4197 spin_unlock_irq(&rq->lock); 4458 spin_unlock_irq(&rq->lock);
4198 4459
4199 hrtick_set(rq); 4460 if (hrtick)
4461 hrtick_set(rq);
4200 4462
4201 if (unlikely(reacquire_kernel_lock(current) < 0)) 4463 if (unlikely(reacquire_kernel_lock(current) < 0))
4202 goto need_resched_nonpreemptible; 4464 goto need_resched_nonpreemptible;
@@ -4390,22 +4652,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
4390 signal_pending(current)) || 4652 signal_pending(current)) ||
4391 (state == TASK_KILLABLE && 4653 (state == TASK_KILLABLE &&
4392 fatal_signal_pending(current))) { 4654 fatal_signal_pending(current))) {
4393 __remove_wait_queue(&x->wait, &wait); 4655 timeout = -ERESTARTSYS;
4394 return -ERESTARTSYS; 4656 break;
4395 } 4657 }
4396 __set_current_state(state); 4658 __set_current_state(state);
4397 spin_unlock_irq(&x->wait.lock); 4659 spin_unlock_irq(&x->wait.lock);
4398 timeout = schedule_timeout(timeout); 4660 timeout = schedule_timeout(timeout);
4399 spin_lock_irq(&x->wait.lock); 4661 spin_lock_irq(&x->wait.lock);
4400 if (!timeout) { 4662 } while (!x->done && timeout);
4401 __remove_wait_queue(&x->wait, &wait);
4402 return timeout;
4403 }
4404 } while (!x->done);
4405 __remove_wait_queue(&x->wait, &wait); 4663 __remove_wait_queue(&x->wait, &wait);
4664 if (!x->done)
4665 return timeout;
4406 } 4666 }
4407 x->done--; 4667 x->done--;
4408 return timeout; 4668 return timeout ?: 1;
4409} 4669}
4410 4670
4411static long __sched 4671static long __sched
@@ -4580,10 +4840,8 @@ void set_user_nice(struct task_struct *p, long nice)
4580 goto out_unlock; 4840 goto out_unlock;
4581 } 4841 }
4582 on_rq = p->se.on_rq; 4842 on_rq = p->se.on_rq;
4583 if (on_rq) { 4843 if (on_rq)
4584 dequeue_task(rq, p, 0); 4844 dequeue_task(rq, p, 0);
4585 dec_load(rq, p);
4586 }
4587 4845
4588 p->static_prio = NICE_TO_PRIO(nice); 4846 p->static_prio = NICE_TO_PRIO(nice);
4589 set_load_weight(p); 4847 set_load_weight(p);
@@ -4593,7 +4851,6 @@ void set_user_nice(struct task_struct *p, long nice)
4593 4851
4594 if (on_rq) { 4852 if (on_rq) {
4595 enqueue_task(rq, p, 0); 4853 enqueue_task(rq, p, 0);
4596 inc_load(rq, p);
4597 /* 4854 /*
4598 * If the task increased its priority or is running and 4855 * If the task increased its priority or is running and
4599 * lowered its priority, then reschedule its CPU: 4856 * lowered its priority, then reschedule its CPU:
@@ -4738,16 +4995,8 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4738 set_load_weight(p); 4995 set_load_weight(p);
4739} 4996}
4740 4997
4741/** 4998static int __sched_setscheduler(struct task_struct *p, int policy,
4742 * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. 4999 struct sched_param *param, bool user)
4743 * @p: the task in question.
4744 * @policy: new policy.
4745 * @param: structure containing the new RT priority.
4746 *
4747 * NOTE that the task may be already dead.
4748 */
4749int sched_setscheduler(struct task_struct *p, int policy,
4750 struct sched_param *param)
4751{ 5000{
4752 int retval, oldprio, oldpolicy = -1, on_rq, running; 5001 int retval, oldprio, oldpolicy = -1, on_rq, running;
4753 unsigned long flags; 5002 unsigned long flags;
@@ -4779,7 +5028,7 @@ recheck:
4779 /* 5028 /*
4780 * Allow unprivileged RT tasks to decrease priority: 5029 * Allow unprivileged RT tasks to decrease priority:
4781 */ 5030 */
4782 if (!capable(CAP_SYS_NICE)) { 5031 if (user && !capable(CAP_SYS_NICE)) {
4783 if (rt_policy(policy)) { 5032 if (rt_policy(policy)) {
4784 unsigned long rlim_rtprio; 5033 unsigned long rlim_rtprio;
4785 5034
@@ -4815,7 +5064,8 @@ recheck:
4815 * Do not allow realtime tasks into groups that have no runtime 5064 * Do not allow realtime tasks into groups that have no runtime
4816 * assigned. 5065 * assigned.
4817 */ 5066 */
4818 if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) 5067 if (user
5068 && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
4819 return -EPERM; 5069 return -EPERM;
4820#endif 5070#endif
4821 5071
@@ -4864,8 +5114,39 @@ recheck:
4864 5114
4865 return 0; 5115 return 0;
4866} 5116}
5117
5118/**
5119 * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
5120 * @p: the task in question.
5121 * @policy: new policy.
5122 * @param: structure containing the new RT priority.
5123 *
5124 * NOTE that the task may be already dead.
5125 */
5126int sched_setscheduler(struct task_struct *p, int policy,
5127 struct sched_param *param)
5128{
5129 return __sched_setscheduler(p, policy, param, true);
5130}
4867EXPORT_SYMBOL_GPL(sched_setscheduler); 5131EXPORT_SYMBOL_GPL(sched_setscheduler);
4868 5132
5133/**
5134 * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
5135 * @p: the task in question.
5136 * @policy: new policy.
5137 * @param: structure containing the new RT priority.
5138 *
5139 * Just like sched_setscheduler, only don't bother checking if the
5140 * current context has permission. For example, this is needed in
5141 * stop_machine(): we create temporary high priority worker threads,
5142 * but our caller might not have that capability.
5143 */
5144int sched_setscheduler_nocheck(struct task_struct *p, int policy,
5145 struct sched_param *param)
5146{
5147 return __sched_setscheduler(p, policy, param, false);
5148}
5149
4869static int 5150static int
4870do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 5151do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4871{ 5152{
@@ -5064,24 +5345,6 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
5064 return sched_setaffinity(pid, &new_mask); 5345 return sched_setaffinity(pid, &new_mask);
5065} 5346}
5066 5347
5067/*
5068 * Represents all cpu's present in the system
5069 * In systems capable of hotplug, this map could dynamically grow
5070 * as new cpu's are detected in the system via any platform specific
5071 * method, such as ACPI for e.g.
5072 */
5073
5074cpumask_t cpu_present_map __read_mostly;
5075EXPORT_SYMBOL(cpu_present_map);
5076
5077#ifndef CONFIG_SMP
5078cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
5079EXPORT_SYMBOL(cpu_online_map);
5080
5081cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
5082EXPORT_SYMBOL(cpu_possible_map);
5083#endif
5084
5085long sched_getaffinity(pid_t pid, cpumask_t *mask) 5348long sched_getaffinity(pid_t pid, cpumask_t *mask)
5086{ 5349{
5087 struct task_struct *p; 5350 struct task_struct *p;
@@ -5378,7 +5641,7 @@ out_unlock:
5378 return retval; 5641 return retval;
5379} 5642}
5380 5643
5381static const char stat_nam[] = "RSDTtZX"; 5644static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
5382 5645
5383void sched_show_task(struct task_struct *p) 5646void sched_show_task(struct task_struct *p)
5384{ 5647{
@@ -5565,6 +5828,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
5565 goto out; 5828 goto out;
5566 } 5829 }
5567 5830
5831 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
5832 !cpus_equal(p->cpus_allowed, *new_mask))) {
5833 ret = -EINVAL;
5834 goto out;
5835 }
5836
5568 if (p->sched_class->set_cpus_allowed) 5837 if (p->sched_class->set_cpus_allowed)
5569 p->sched_class->set_cpus_allowed(p, new_mask); 5838 p->sched_class->set_cpus_allowed(p, new_mask);
5570 else { 5839 else {
@@ -5616,10 +5885,10 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5616 double_rq_lock(rq_src, rq_dest); 5885 double_rq_lock(rq_src, rq_dest);
5617 /* Already moved. */ 5886 /* Already moved. */
5618 if (task_cpu(p) != src_cpu) 5887 if (task_cpu(p) != src_cpu)
5619 goto out; 5888 goto done;
5620 /* Affinity changed (again). */ 5889 /* Affinity changed (again). */
5621 if (!cpu_isset(dest_cpu, p->cpus_allowed)) 5890 if (!cpu_isset(dest_cpu, p->cpus_allowed))
5622 goto out; 5891 goto fail;
5623 5892
5624 on_rq = p->se.on_rq; 5893 on_rq = p->se.on_rq;
5625 if (on_rq) 5894 if (on_rq)
@@ -5630,8 +5899,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5630 activate_task(rq_dest, p, 0); 5899 activate_task(rq_dest, p, 0);
5631 check_preempt_curr(rq_dest, p); 5900 check_preempt_curr(rq_dest, p);
5632 } 5901 }
5902done:
5633 ret = 1; 5903 ret = 1;
5634out: 5904fail:
5635 double_rq_unlock(rq_src, rq_dest); 5905 double_rq_unlock(rq_src, rq_dest);
5636 return ret; 5906 return ret;
5637} 5907}
@@ -5881,6 +6151,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5881 next = pick_next_task(rq, rq->curr); 6151 next = pick_next_task(rq, rq->curr);
5882 if (!next) 6152 if (!next)
5883 break; 6153 break;
6154 next->sched_class->put_prev_task(rq, next);
5884 migrate_dead(dead_cpu, next); 6155 migrate_dead(dead_cpu, next);
5885 6156
5886 } 6157 }
@@ -6052,6 +6323,36 @@ static void unregister_sched_domain_sysctl(void)
6052} 6323}
6053#endif 6324#endif
6054 6325
6326static void set_rq_online(struct rq *rq)
6327{
6328 if (!rq->online) {
6329 const struct sched_class *class;
6330
6331 cpu_set(rq->cpu, rq->rd->online);
6332 rq->online = 1;
6333
6334 for_each_class(class) {
6335 if (class->rq_online)
6336 class->rq_online(rq);
6337 }
6338 }
6339}
6340
6341static void set_rq_offline(struct rq *rq)
6342{
6343 if (rq->online) {
6344 const struct sched_class *class;
6345
6346 for_each_class(class) {
6347 if (class->rq_offline)
6348 class->rq_offline(rq);
6349 }
6350
6351 cpu_clear(rq->cpu, rq->rd->online);
6352 rq->online = 0;
6353 }
6354}
6355
6055/* 6356/*
6056 * migration_call - callback that gets triggered when a CPU is added. 6357 * migration_call - callback that gets triggered when a CPU is added.
6057 * Here we can start up the necessary migration thread for the new CPU. 6358 * Here we can start up the necessary migration thread for the new CPU.
@@ -6089,7 +6390,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6089 spin_lock_irqsave(&rq->lock, flags); 6390 spin_lock_irqsave(&rq->lock, flags);
6090 if (rq->rd) { 6391 if (rq->rd) {
6091 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6392 BUG_ON(!cpu_isset(cpu, rq->rd->span));
6092 cpu_set(cpu, rq->rd->online); 6393
6394 set_rq_online(rq);
6093 } 6395 }
6094 spin_unlock_irqrestore(&rq->lock, flags); 6396 spin_unlock_irqrestore(&rq->lock, flags);
6095 break; 6397 break;
@@ -6150,7 +6452,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6150 spin_lock_irqsave(&rq->lock, flags); 6452 spin_lock_irqsave(&rq->lock, flags);
6151 if (rq->rd) { 6453 if (rq->rd) {
6152 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6454 BUG_ON(!cpu_isset(cpu, rq->rd->span));
6153 cpu_clear(cpu, rq->rd->online); 6455 set_rq_offline(rq);
6154 } 6456 }
6155 spin_unlock_irqrestore(&rq->lock, flags); 6457 spin_unlock_irqrestore(&rq->lock, flags);
6156 break; 6458 break;
@@ -6184,6 +6486,28 @@ void __init migration_init(void)
6184 6486
6185#ifdef CONFIG_SCHED_DEBUG 6487#ifdef CONFIG_SCHED_DEBUG
6186 6488
6489static inline const char *sd_level_to_string(enum sched_domain_level lvl)
6490{
6491 switch (lvl) {
6492 case SD_LV_NONE:
6493 return "NONE";
6494 case SD_LV_SIBLING:
6495 return "SIBLING";
6496 case SD_LV_MC:
6497 return "MC";
6498 case SD_LV_CPU:
6499 return "CPU";
6500 case SD_LV_NODE:
6501 return "NODE";
6502 case SD_LV_ALLNODES:
6503 return "ALLNODES";
6504 case SD_LV_MAX:
6505 return "MAX";
6506
6507 }
6508 return "MAX";
6509}
6510
6187static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 6511static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6188 cpumask_t *groupmask) 6512 cpumask_t *groupmask)
6189{ 6513{
@@ -6203,7 +6527,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6203 return -1; 6527 return -1;
6204 } 6528 }
6205 6529
6206 printk(KERN_CONT "span %s\n", str); 6530 printk(KERN_CONT "span %s level %s\n",
6531 str, sd_level_to_string(sd->level));
6207 6532
6208 if (!cpu_isset(cpu, sd->span)) { 6533 if (!cpu_isset(cpu, sd->span)) {
6209 printk(KERN_ERR "ERROR: domain->span does not contain " 6534 printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -6287,9 +6612,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6287 } 6612 }
6288 kfree(groupmask); 6613 kfree(groupmask);
6289} 6614}
6290#else 6615#else /* !CONFIG_SCHED_DEBUG */
6291# define sched_domain_debug(sd, cpu) do { } while (0) 6616# define sched_domain_debug(sd, cpu) do { } while (0)
6292#endif 6617#endif /* CONFIG_SCHED_DEBUG */
6293 6618
6294static int sd_degenerate(struct sched_domain *sd) 6619static int sd_degenerate(struct sched_domain *sd)
6295{ 6620{
@@ -6349,20 +6674,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6349static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6674static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6350{ 6675{
6351 unsigned long flags; 6676 unsigned long flags;
6352 const struct sched_class *class;
6353 6677
6354 spin_lock_irqsave(&rq->lock, flags); 6678 spin_lock_irqsave(&rq->lock, flags);
6355 6679
6356 if (rq->rd) { 6680 if (rq->rd) {
6357 struct root_domain *old_rd = rq->rd; 6681 struct root_domain *old_rd = rq->rd;
6358 6682
6359 for (class = sched_class_highest; class; class = class->next) { 6683 if (cpu_isset(rq->cpu, old_rd->online))
6360 if (class->leave_domain) 6684 set_rq_offline(rq);
6361 class->leave_domain(rq);
6362 }
6363 6685
6364 cpu_clear(rq->cpu, old_rd->span); 6686 cpu_clear(rq->cpu, old_rd->span);
6365 cpu_clear(rq->cpu, old_rd->online);
6366 6687
6367 if (atomic_dec_and_test(&old_rd->refcount)) 6688 if (atomic_dec_and_test(&old_rd->refcount))
6368 kfree(old_rd); 6689 kfree(old_rd);
@@ -6373,12 +6694,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6373 6694
6374 cpu_set(rq->cpu, rd->span); 6695 cpu_set(rq->cpu, rd->span);
6375 if (cpu_isset(rq->cpu, cpu_online_map)) 6696 if (cpu_isset(rq->cpu, cpu_online_map))
6376 cpu_set(rq->cpu, rd->online); 6697 set_rq_online(rq);
6377
6378 for (class = sched_class_highest; class; class = class->next) {
6379 if (class->join_domain)
6380 class->join_domain(rq);
6381 }
6382 6698
6383 spin_unlock_irqrestore(&rq->lock, flags); 6699 spin_unlock_irqrestore(&rq->lock, flags);
6384} 6700}
@@ -6389,6 +6705,8 @@ static void init_rootdomain(struct root_domain *rd)
6389 6705
6390 cpus_clear(rd->span); 6706 cpus_clear(rd->span);
6391 cpus_clear(rd->online); 6707 cpus_clear(rd->online);
6708
6709 cpupri_init(&rd->cpupri);
6392} 6710}
6393 6711
6394static void init_defrootdomain(void) 6712static void init_defrootdomain(void)
@@ -6531,9 +6849,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
6531 6849
6532 min_val = INT_MAX; 6850 min_val = INT_MAX;
6533 6851
6534 for (i = 0; i < MAX_NUMNODES; i++) { 6852 for (i = 0; i < nr_node_ids; i++) {
6535 /* Start at @node */ 6853 /* Start at @node */
6536 n = (node + i) % MAX_NUMNODES; 6854 n = (node + i) % nr_node_ids;
6537 6855
6538 if (!nr_cpus_node(n)) 6856 if (!nr_cpus_node(n))
6539 continue; 6857 continue;
@@ -6583,7 +6901,7 @@ static void sched_domain_node_span(int node, cpumask_t *span)
6583 cpus_or(*span, *span, *nodemask); 6901 cpus_or(*span, *span, *nodemask);
6584 } 6902 }
6585} 6903}
6586#endif 6904#endif /* CONFIG_NUMA */
6587 6905
6588int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 6906int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6589 6907
@@ -6602,7 +6920,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6602 *sg = &per_cpu(sched_group_cpus, cpu); 6920 *sg = &per_cpu(sched_group_cpus, cpu);
6603 return cpu; 6921 return cpu;
6604} 6922}
6605#endif 6923#endif /* CONFIG_SCHED_SMT */
6606 6924
6607/* 6925/*
6608 * multi-core sched-domains: 6926 * multi-core sched-domains:
@@ -6610,7 +6928,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
6610#ifdef CONFIG_SCHED_MC 6928#ifdef CONFIG_SCHED_MC
6611static DEFINE_PER_CPU(struct sched_domain, core_domains); 6929static DEFINE_PER_CPU(struct sched_domain, core_domains);
6612static DEFINE_PER_CPU(struct sched_group, sched_group_core); 6930static DEFINE_PER_CPU(struct sched_group, sched_group_core);
6613#endif 6931#endif /* CONFIG_SCHED_MC */
6614 6932
6615#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 6933#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
6616static int 6934static int
@@ -6712,7 +7030,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
6712 sg = sg->next; 7030 sg = sg->next;
6713 } while (sg != group_head); 7031 } while (sg != group_head);
6714} 7032}
6715#endif 7033#endif /* CONFIG_NUMA */
6716 7034
6717#ifdef CONFIG_NUMA 7035#ifdef CONFIG_NUMA
6718/* Free memory allocated for various sched_group structures */ 7036/* Free memory allocated for various sched_group structures */
@@ -6727,7 +7045,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
6727 if (!sched_group_nodes) 7045 if (!sched_group_nodes)
6728 continue; 7046 continue;
6729 7047
6730 for (i = 0; i < MAX_NUMNODES; i++) { 7048 for (i = 0; i < nr_node_ids; i++) {
6731 struct sched_group *oldsg, *sg = sched_group_nodes[i]; 7049 struct sched_group *oldsg, *sg = sched_group_nodes[i];
6732 7050
6733 *nodemask = node_to_cpumask(i); 7051 *nodemask = node_to_cpumask(i);
@@ -6749,11 +7067,11 @@ next_sg:
6749 sched_group_nodes_bycpu[cpu] = NULL; 7067 sched_group_nodes_bycpu[cpu] = NULL;
6750 } 7068 }
6751} 7069}
6752#else 7070#else /* !CONFIG_NUMA */
6753static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 7071static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
6754{ 7072{
6755} 7073}
6756#endif 7074#endif /* CONFIG_NUMA */
6757 7075
6758/* 7076/*
6759 * Initialize sched groups cpu_power. 7077 * Initialize sched groups cpu_power.
@@ -6871,7 +7189,12 @@ static int default_relax_domain_level = -1;
6871 7189
6872static int __init setup_relax_domain_level(char *str) 7190static int __init setup_relax_domain_level(char *str)
6873{ 7191{
6874 default_relax_domain_level = simple_strtoul(str, NULL, 0); 7192 unsigned long val;
7193
7194 val = simple_strtoul(str, NULL, 0);
7195 if (val < SD_LV_MAX)
7196 default_relax_domain_level = val;
7197
6875 return 1; 7198 return 1;
6876} 7199}
6877__setup("relax_domain_level=", setup_relax_domain_level); 7200__setup("relax_domain_level=", setup_relax_domain_level);
@@ -6915,7 +7238,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
6915 /* 7238 /*
6916 * Allocate the per-node list of sched groups 7239 * Allocate the per-node list of sched groups
6917 */ 7240 */
6918 sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), 7241 sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *),
6919 GFP_KERNEL); 7242 GFP_KERNEL);
6920 if (!sched_group_nodes) { 7243 if (!sched_group_nodes) {
6921 printk(KERN_WARNING "Can not alloc sched group node list\n"); 7244 printk(KERN_WARNING "Can not alloc sched group node list\n");
@@ -7054,7 +7377,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7054#endif 7377#endif
7055 7378
7056 /* Set up physical groups */ 7379 /* Set up physical groups */
7057 for (i = 0; i < MAX_NUMNODES; i++) { 7380 for (i = 0; i < nr_node_ids; i++) {
7058 SCHED_CPUMASK_VAR(nodemask, allmasks); 7381 SCHED_CPUMASK_VAR(nodemask, allmasks);
7059 SCHED_CPUMASK_VAR(send_covered, allmasks); 7382 SCHED_CPUMASK_VAR(send_covered, allmasks);
7060 7383
@@ -7078,7 +7401,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7078 send_covered, tmpmask); 7401 send_covered, tmpmask);
7079 } 7402 }
7080 7403
7081 for (i = 0; i < MAX_NUMNODES; i++) { 7404 for (i = 0; i < nr_node_ids; i++) {
7082 /* Set up node groups */ 7405 /* Set up node groups */
7083 struct sched_group *sg, *prev; 7406 struct sched_group *sg, *prev;
7084 SCHED_CPUMASK_VAR(nodemask, allmasks); 7407 SCHED_CPUMASK_VAR(nodemask, allmasks);
@@ -7117,9 +7440,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7117 cpus_or(*covered, *covered, *nodemask); 7440 cpus_or(*covered, *covered, *nodemask);
7118 prev = sg; 7441 prev = sg;
7119 7442
7120 for (j = 0; j < MAX_NUMNODES; j++) { 7443 for (j = 0; j < nr_node_ids; j++) {
7121 SCHED_CPUMASK_VAR(notcovered, allmasks); 7444 SCHED_CPUMASK_VAR(notcovered, allmasks);
7122 int n = (i + j) % MAX_NUMNODES; 7445 int n = (i + j) % nr_node_ids;
7123 node_to_cpumask_ptr(pnodemask, n); 7446 node_to_cpumask_ptr(pnodemask, n);
7124 7447
7125 cpus_complement(*notcovered, *covered); 7448 cpus_complement(*notcovered, *covered);
@@ -7172,7 +7495,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7172 } 7495 }
7173 7496
7174#ifdef CONFIG_NUMA 7497#ifdef CONFIG_NUMA
7175 for (i = 0; i < MAX_NUMNODES; i++) 7498 for (i = 0; i < nr_node_ids; i++)
7176 init_numa_sched_groups_power(sched_group_nodes[i]); 7499 init_numa_sched_groups_power(sched_group_nodes[i]);
7177 7500
7178 if (sd_allnodes) { 7501 if (sd_allnodes) {
@@ -7230,6 +7553,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
7230} 7553}
7231 7554
7232/* 7555/*
7556 * Free current domain masks.
7557 * Called after all cpus are attached to NULL domain.
7558 */
7559static void free_sched_domains(void)
7560{
7561 ndoms_cur = 0;
7562 if (doms_cur != &fallback_doms)
7563 kfree(doms_cur);
7564 doms_cur = &fallback_doms;
7565}
7566
7567/*
7233 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7568 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
7234 * For now this just excludes isolated cpus, but could be used to 7569 * For now this just excludes isolated cpus, but could be used to
7235 * exclude other special cases in the future. 7570 * exclude other special cases in the future.
@@ -7376,6 +7711,7 @@ int arch_reinit_sched_domains(void)
7376 get_online_cpus(); 7711 get_online_cpus();
7377 mutex_lock(&sched_domains_mutex); 7712 mutex_lock(&sched_domains_mutex);
7378 detach_destroy_domains(&cpu_online_map); 7713 detach_destroy_domains(&cpu_online_map);
7714 free_sched_domains();
7379 err = arch_init_sched_domains(&cpu_online_map); 7715 err = arch_init_sched_domains(&cpu_online_map);
7380 mutex_unlock(&sched_domains_mutex); 7716 mutex_unlock(&sched_domains_mutex);
7381 put_online_cpus(); 7717 put_online_cpus();
@@ -7444,7 +7780,7 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7444#endif 7780#endif
7445 return err; 7781 return err;
7446} 7782}
7447#endif 7783#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7448 7784
7449/* 7785/*
7450 * Force a reinitialization of the sched domains hierarchy. The domains 7786 * Force a reinitialization of the sched domains hierarchy. The domains
@@ -7455,20 +7791,28 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7455static int update_sched_domains(struct notifier_block *nfb, 7791static int update_sched_domains(struct notifier_block *nfb,
7456 unsigned long action, void *hcpu) 7792 unsigned long action, void *hcpu)
7457{ 7793{
7794 int cpu = (int)(long)hcpu;
7795
7458 switch (action) { 7796 switch (action) {
7459 case CPU_UP_PREPARE:
7460 case CPU_UP_PREPARE_FROZEN:
7461 case CPU_DOWN_PREPARE: 7797 case CPU_DOWN_PREPARE:
7462 case CPU_DOWN_PREPARE_FROZEN: 7798 case CPU_DOWN_PREPARE_FROZEN:
7799 disable_runtime(cpu_rq(cpu));
7800 /* fall-through */
7801 case CPU_UP_PREPARE:
7802 case CPU_UP_PREPARE_FROZEN:
7463 detach_destroy_domains(&cpu_online_map); 7803 detach_destroy_domains(&cpu_online_map);
7804 free_sched_domains();
7464 return NOTIFY_OK; 7805 return NOTIFY_OK;
7465 7806
7466 case CPU_UP_CANCELED: 7807
7467 case CPU_UP_CANCELED_FROZEN:
7468 case CPU_DOWN_FAILED: 7808 case CPU_DOWN_FAILED:
7469 case CPU_DOWN_FAILED_FROZEN: 7809 case CPU_DOWN_FAILED_FROZEN:
7470 case CPU_ONLINE: 7810 case CPU_ONLINE:
7471 case CPU_ONLINE_FROZEN: 7811 case CPU_ONLINE_FROZEN:
7812 enable_runtime(cpu_rq(cpu));
7813 /* fall-through */
7814 case CPU_UP_CANCELED:
7815 case CPU_UP_CANCELED_FROZEN:
7472 case CPU_DEAD: 7816 case CPU_DEAD:
7473 case CPU_DEAD_FROZEN: 7817 case CPU_DEAD_FROZEN:
7474 /* 7818 /*
@@ -7479,8 +7823,16 @@ static int update_sched_domains(struct notifier_block *nfb,
7479 return NOTIFY_DONE; 7823 return NOTIFY_DONE;
7480 } 7824 }
7481 7825
7826#ifndef CONFIG_CPUSETS
7827 /*
7828 * Create default domain partitioning if cpusets are disabled.
7829 * Otherwise we let cpusets rebuild the domains based on the
7830 * current setup.
7831 */
7832
7482 /* The hotplug lock is already held by cpu_up/cpu_down */ 7833 /* The hotplug lock is already held by cpu_up/cpu_down */
7483 arch_init_sched_domains(&cpu_online_map); 7834 arch_init_sched_domains(&cpu_online_map);
7835#endif
7484 7836
7485 return NOTIFY_OK; 7837 return NOTIFY_OK;
7486} 7838}
@@ -7620,7 +7972,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
7620 else 7972 else
7621 rt_se->rt_rq = parent->my_q; 7973 rt_se->rt_rq = parent->my_q;
7622 7974
7623 rt_se->rt_rq = &rq->rt;
7624 rt_se->my_q = rt_rq; 7975 rt_se->my_q = rt_rq;
7625 rt_se->parent = parent; 7976 rt_se->parent = parent;
7626 INIT_LIST_HEAD(&rt_se->run_list); 7977 INIT_LIST_HEAD(&rt_se->run_list);
@@ -7661,8 +8012,8 @@ void __init sched_init(void)
7661 8012
7662 root_task_group.cfs_rq = (struct cfs_rq **)ptr; 8013 root_task_group.cfs_rq = (struct cfs_rq **)ptr;
7663 ptr += nr_cpu_ids * sizeof(void **); 8014 ptr += nr_cpu_ids * sizeof(void **);
7664#endif 8015#endif /* CONFIG_USER_SCHED */
7665#endif 8016#endif /* CONFIG_FAIR_GROUP_SCHED */
7666#ifdef CONFIG_RT_GROUP_SCHED 8017#ifdef CONFIG_RT_GROUP_SCHED
7667 init_task_group.rt_se = (struct sched_rt_entity **)ptr; 8018 init_task_group.rt_se = (struct sched_rt_entity **)ptr;
7668 ptr += nr_cpu_ids * sizeof(void **); 8019 ptr += nr_cpu_ids * sizeof(void **);
@@ -7676,8 +8027,8 @@ void __init sched_init(void)
7676 8027
7677 root_task_group.rt_rq = (struct rt_rq **)ptr; 8028 root_task_group.rt_rq = (struct rt_rq **)ptr;
7678 ptr += nr_cpu_ids * sizeof(void **); 8029 ptr += nr_cpu_ids * sizeof(void **);
7679#endif 8030#endif /* CONFIG_USER_SCHED */
7680#endif 8031#endif /* CONFIG_RT_GROUP_SCHED */
7681 } 8032 }
7682 8033
7683#ifdef CONFIG_SMP 8034#ifdef CONFIG_SMP
@@ -7693,8 +8044,8 @@ void __init sched_init(void)
7693#ifdef CONFIG_USER_SCHED 8044#ifdef CONFIG_USER_SCHED
7694 init_rt_bandwidth(&root_task_group.rt_bandwidth, 8045 init_rt_bandwidth(&root_task_group.rt_bandwidth,
7695 global_rt_period(), RUNTIME_INF); 8046 global_rt_period(), RUNTIME_INF);
7696#endif 8047#endif /* CONFIG_USER_SCHED */
7697#endif 8048#endif /* CONFIG_RT_GROUP_SCHED */
7698 8049
7699#ifdef CONFIG_GROUP_SCHED 8050#ifdef CONFIG_GROUP_SCHED
7700 list_add(&init_task_group.list, &task_groups); 8051 list_add(&init_task_group.list, &task_groups);
@@ -7704,8 +8055,8 @@ void __init sched_init(void)
7704 INIT_LIST_HEAD(&root_task_group.children); 8055 INIT_LIST_HEAD(&root_task_group.children);
7705 init_task_group.parent = &root_task_group; 8056 init_task_group.parent = &root_task_group;
7706 list_add(&init_task_group.siblings, &root_task_group.children); 8057 list_add(&init_task_group.siblings, &root_task_group.children);
7707#endif 8058#endif /* CONFIG_USER_SCHED */
7708#endif 8059#endif /* CONFIG_GROUP_SCHED */
7709 8060
7710 for_each_possible_cpu(i) { 8061 for_each_possible_cpu(i) {
7711 struct rq *rq; 8062 struct rq *rq;
@@ -7785,6 +8136,7 @@ void __init sched_init(void)
7785 rq->next_balance = jiffies; 8136 rq->next_balance = jiffies;
7786 rq->push_cpu = 0; 8137 rq->push_cpu = 0;
7787 rq->cpu = i; 8138 rq->cpu = i;
8139 rq->online = 0;
7788 rq->migration_thread = NULL; 8140 rq->migration_thread = NULL;
7789 INIT_LIST_HEAD(&rq->migration_queue); 8141 INIT_LIST_HEAD(&rq->migration_queue);
7790 rq_attach_root(rq, &def_root_domain); 8142 rq_attach_root(rq, &def_root_domain);
@@ -7800,7 +8152,7 @@ void __init sched_init(void)
7800#endif 8152#endif
7801 8153
7802#ifdef CONFIG_SMP 8154#ifdef CONFIG_SMP
7803 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); 8155 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
7804#endif 8156#endif
7805 8157
7806#ifdef CONFIG_RT_MUTEXES 8158#ifdef CONFIG_RT_MUTEXES
@@ -8024,7 +8376,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8024{ 8376{
8025 list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); 8377 list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
8026} 8378}
8027#else 8379#else /* !CONFG_FAIR_GROUP_SCHED */
8028static inline void free_fair_sched_group(struct task_group *tg) 8380static inline void free_fair_sched_group(struct task_group *tg)
8029{ 8381{
8030} 8382}
@@ -8042,7 +8394,7 @@ static inline void register_fair_sched_group(struct task_group *tg, int cpu)
8042static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) 8394static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8043{ 8395{
8044} 8396}
8045#endif 8397#endif /* CONFIG_FAIR_GROUP_SCHED */
8046 8398
8047#ifdef CONFIG_RT_GROUP_SCHED 8399#ifdef CONFIG_RT_GROUP_SCHED
8048static void free_rt_sched_group(struct task_group *tg) 8400static void free_rt_sched_group(struct task_group *tg)
@@ -8113,7 +8465,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8113{ 8465{
8114 list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); 8466 list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
8115} 8467}
8116#else 8468#else /* !CONFIG_RT_GROUP_SCHED */
8117static inline void free_rt_sched_group(struct task_group *tg) 8469static inline void free_rt_sched_group(struct task_group *tg)
8118{ 8470{
8119} 8471}
@@ -8131,7 +8483,7 @@ static inline void register_rt_sched_group(struct task_group *tg, int cpu)
8131static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) 8483static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8132{ 8484{
8133} 8485}
8134#endif 8486#endif /* CONFIG_RT_GROUP_SCHED */
8135 8487
8136#ifdef CONFIG_GROUP_SCHED 8488#ifdef CONFIG_GROUP_SCHED
8137static void free_sched_group(struct task_group *tg) 8489static void free_sched_group(struct task_group *tg)
@@ -8242,17 +8594,14 @@ void sched_move_task(struct task_struct *tsk)
8242 8594
8243 task_rq_unlock(rq, &flags); 8595 task_rq_unlock(rq, &flags);
8244} 8596}
8245#endif 8597#endif /* CONFIG_GROUP_SCHED */
8246 8598
8247#ifdef CONFIG_FAIR_GROUP_SCHED 8599#ifdef CONFIG_FAIR_GROUP_SCHED
8248static void set_se_shares(struct sched_entity *se, unsigned long shares) 8600static void __set_se_shares(struct sched_entity *se, unsigned long shares)
8249{ 8601{
8250 struct cfs_rq *cfs_rq = se->cfs_rq; 8602 struct cfs_rq *cfs_rq = se->cfs_rq;
8251 struct rq *rq = cfs_rq->rq;
8252 int on_rq; 8603 int on_rq;
8253 8604
8254 spin_lock_irq(&rq->lock);
8255
8256 on_rq = se->on_rq; 8605 on_rq = se->on_rq;
8257 if (on_rq) 8606 if (on_rq)
8258 dequeue_entity(cfs_rq, se, 0); 8607 dequeue_entity(cfs_rq, se, 0);
@@ -8262,8 +8611,17 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
8262 8611
8263 if (on_rq) 8612 if (on_rq)
8264 enqueue_entity(cfs_rq, se, 0); 8613 enqueue_entity(cfs_rq, se, 0);
8614}
8265 8615
8266 spin_unlock_irq(&rq->lock); 8616static void set_se_shares(struct sched_entity *se, unsigned long shares)
8617{
8618 struct cfs_rq *cfs_rq = se->cfs_rq;
8619 struct rq *rq = cfs_rq->rq;
8620 unsigned long flags;
8621
8622 spin_lock_irqsave(&rq->lock, flags);
8623 __set_se_shares(se, shares);
8624 spin_unlock_irqrestore(&rq->lock, flags);
8267} 8625}
8268 8626
8269static DEFINE_MUTEX(shares_mutex); 8627static DEFINE_MUTEX(shares_mutex);
@@ -8302,8 +8660,13 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8302 * w/o tripping rebalance_share or load_balance_fair. 8660 * w/o tripping rebalance_share or load_balance_fair.
8303 */ 8661 */
8304 tg->shares = shares; 8662 tg->shares = shares;
8305 for_each_possible_cpu(i) 8663 for_each_possible_cpu(i) {
8664 /*
8665 * force a rebalance
8666 */
8667 cfs_rq_set_shares(tg->cfs_rq[i], 0);
8306 set_se_shares(tg->se[i], shares); 8668 set_se_shares(tg->se[i], shares);
8669 }
8307 8670
8308 /* 8671 /*
8309 * Enable load balance activity on this group, by inserting it back on 8672 * Enable load balance activity on this group, by inserting it back on
@@ -8366,7 +8729,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8366 } 8729 }
8367 rcu_read_unlock(); 8730 rcu_read_unlock();
8368 8731
8369 return total + to_ratio(period, runtime) < 8732 return total + to_ratio(period, runtime) <=
8370 to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), 8733 to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
8371 parent->rt_bandwidth.rt_runtime); 8734 parent->rt_bandwidth.rt_runtime);
8372} 8735}
@@ -8469,6 +8832,9 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
8469 rt_period = (u64)rt_period_us * NSEC_PER_USEC; 8832 rt_period = (u64)rt_period_us * NSEC_PER_USEC;
8470 rt_runtime = tg->rt_bandwidth.rt_runtime; 8833 rt_runtime = tg->rt_bandwidth.rt_runtime;
8471 8834
8835 if (rt_period == 0)
8836 return -EINVAL;
8837
8472 return tg_set_bandwidth(tg, rt_period, rt_runtime); 8838 return tg_set_bandwidth(tg, rt_period, rt_runtime);
8473} 8839}
8474 8840
@@ -8483,16 +8849,21 @@ long sched_group_rt_period(struct task_group *tg)
8483 8849
8484static int sched_rt_global_constraints(void) 8850static int sched_rt_global_constraints(void)
8485{ 8851{
8852 struct task_group *tg = &root_task_group;
8853 u64 rt_runtime, rt_period;
8486 int ret = 0; 8854 int ret = 0;
8487 8855
8856 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8857 rt_runtime = tg->rt_bandwidth.rt_runtime;
8858
8488 mutex_lock(&rt_constraints_mutex); 8859 mutex_lock(&rt_constraints_mutex);
8489 if (!__rt_schedulable(NULL, 1, 0)) 8860 if (!__rt_schedulable(tg, rt_period, rt_runtime))
8490 ret = -EINVAL; 8861 ret = -EINVAL;
8491 mutex_unlock(&rt_constraints_mutex); 8862 mutex_unlock(&rt_constraints_mutex);
8492 8863
8493 return ret; 8864 return ret;
8494} 8865}
8495#else 8866#else /* !CONFIG_RT_GROUP_SCHED */
8496static int sched_rt_global_constraints(void) 8867static int sched_rt_global_constraints(void)
8497{ 8868{
8498 unsigned long flags; 8869 unsigned long flags;
@@ -8510,7 +8881,7 @@ static int sched_rt_global_constraints(void)
8510 8881
8511 return 0; 8882 return 0;
8512} 8883}
8513#endif 8884#endif /* CONFIG_RT_GROUP_SCHED */
8514 8885
8515int sched_rt_handler(struct ctl_table *table, int write, 8886int sched_rt_handler(struct ctl_table *table, int write,
8516 struct file *filp, void __user *buffer, size_t *lenp, 8887 struct file *filp, void __user *buffer, size_t *lenp,
@@ -8618,7 +8989,7 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
8618 8989
8619 return (u64) tg->shares; 8990 return (u64) tg->shares;
8620} 8991}
8621#endif 8992#endif /* CONFIG_FAIR_GROUP_SCHED */
8622 8993
8623#ifdef CONFIG_RT_GROUP_SCHED 8994#ifdef CONFIG_RT_GROUP_SCHED
8624static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, 8995static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
@@ -8642,7 +9013,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
8642{ 9013{
8643 return sched_group_rt_period(cgroup_tg(cgrp)); 9014 return sched_group_rt_period(cgroup_tg(cgrp));
8644} 9015}
8645#endif 9016#endif /* CONFIG_RT_GROUP_SCHED */
8646 9017
8647static struct cftype cpu_files[] = { 9018static struct cftype cpu_files[] = {
8648#ifdef CONFIG_FAIR_GROUP_SCHED 9019#ifdef CONFIG_FAIR_GROUP_SCHED