aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c358
1 files changed, 83 insertions, 275 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index f28f19e65b59..b02e4fc25645 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -174,41 +174,6 @@ struct task_group {
174 struct sched_entity **se; 174 struct sched_entity **se;
175 /* runqueue "owned" by this group on each cpu */ 175 /* runqueue "owned" by this group on each cpu */
176 struct cfs_rq **cfs_rq; 176 struct cfs_rq **cfs_rq;
177
178 /*
179 * shares assigned to a task group governs how much of cpu bandwidth
180 * is allocated to the group. The more shares a group has, the more is
181 * the cpu bandwidth allocated to it.
182 *
183 * For ex, lets say that there are three task groups, A, B and C which
184 * have been assigned shares 1000, 2000 and 3000 respectively. Then,
185 * cpu bandwidth allocated by the scheduler to task groups A, B and C
186 * should be:
187 *
188 * Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66%
189 * Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33%
190 * Bw(C) = 3000/(1000+2000+3000) * 100 = 50%
191 *
192 * The weight assigned to a task group's schedulable entities on every
193 * cpu (task_group.se[a_cpu]->load.weight) is derived from the task
194 * group's shares. For ex: lets say that task group A has been
195 * assigned shares of 1000 and there are two CPUs in a system. Then,
196 *
197 * tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000;
198 *
199 * Note: It's not necessary that each of a task's group schedulable
200 * entity have the same weight on all CPUs. If the group
201 * has 2 of its tasks on CPU0 and 1 task on CPU1, then a
202 * better distribution of weight could be:
203 *
204 * tg_A->se[0]->load.weight = 2/3 * 2000 = 1333
205 * tg_A->se[1]->load.weight = 1/2 * 2000 = 667
206 *
207 * rebalance_shares() is responsible for distributing the shares of a
208 * task groups like this among the group's schedulable entities across
209 * cpus.
210 *
211 */
212 unsigned long shares; 177 unsigned long shares;
213#endif 178#endif
214 179
@@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock);
250static DEFINE_MUTEX(doms_cur_mutex); 215static DEFINE_MUTEX(doms_cur_mutex);
251 216
252#ifdef CONFIG_FAIR_GROUP_SCHED 217#ifdef CONFIG_FAIR_GROUP_SCHED
253#ifdef CONFIG_SMP
254/* kernel thread that runs rebalance_shares() periodically */
255static struct task_struct *lb_monitor_task;
256static int load_balance_monitor(void *unused);
257#endif
258
259static void set_se_shares(struct sched_entity *se, unsigned long shares);
260
261#ifdef CONFIG_USER_SCHED 218#ifdef CONFIG_USER_SCHED
262# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) 219# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
263#else 220#else
264# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 221# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
265#endif 222#endif
266 223
267#define MIN_GROUP_SHARES 2
268
269static int init_task_group_load = INIT_TASK_GROUP_LOAD; 224static int init_task_group_load = INIT_TASK_GROUP_LOAD;
270#endif 225#endif
271 226
@@ -668,6 +623,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
668 */ 623 */
669unsigned int sysctl_sched_rt_period = 1000000; 624unsigned int sysctl_sched_rt_period = 1000000;
670 625
626static __read_mostly int scheduler_running;
627
671/* 628/*
672 * part of the period that we allow rt tasks to run in us. 629 * part of the period that we allow rt tasks to run in us.
673 * default: 0.95s 630 * default: 0.95s
@@ -689,14 +646,16 @@ unsigned long long cpu_clock(int cpu)
689 unsigned long flags; 646 unsigned long flags;
690 struct rq *rq; 647 struct rq *rq;
691 648
692 local_irq_save(flags);
693 rq = cpu_rq(cpu);
694 /* 649 /*
695 * Only call sched_clock() if the scheduler has already been 650 * Only call sched_clock() if the scheduler has already been
696 * initialized (some code might call cpu_clock() very early): 651 * initialized (some code might call cpu_clock() very early):
697 */ 652 */
698 if (rq->idle) 653 if (unlikely(!scheduler_running))
699 update_rq_clock(rq); 654 return 0;
655
656 local_irq_save(flags);
657 rq = cpu_rq(cpu);
658 update_rq_clock(rq);
700 now = rq->clock; 659 now = rq->clock;
701 local_irq_restore(flags); 660 local_irq_restore(flags);
702 661
@@ -1241,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1241static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1200static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1242#endif 1201#endif
1243 1202
1244static inline void inc_cpu_load(struct rq *rq, unsigned long load)
1245{
1246 update_load_add(&rq->load, load);
1247}
1248
1249static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1250{
1251 update_load_sub(&rq->load, load);
1252}
1253
1254#ifdef CONFIG_SMP 1203#ifdef CONFIG_SMP
1255static unsigned long source_load(int cpu, int type); 1204static unsigned long source_load(int cpu, int type);
1256static unsigned long target_load(int cpu, int type); 1205static unsigned long target_load(int cpu, int type);
@@ -1268,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1268 1217
1269#define sched_class_highest (&rt_sched_class) 1218#define sched_class_highest (&rt_sched_class)
1270 1219
1271static void inc_nr_running(struct rq *rq) 1220static inline void inc_load(struct rq *rq, const struct task_struct *p)
1221{
1222 update_load_add(&rq->load, p->se.load.weight);
1223}
1224
1225static inline void dec_load(struct rq *rq, const struct task_struct *p)
1226{
1227 update_load_sub(&rq->load, p->se.load.weight);
1228}
1229
1230static void inc_nr_running(struct task_struct *p, struct rq *rq)
1272{ 1231{
1273 rq->nr_running++; 1232 rq->nr_running++;
1233 inc_load(rq, p);
1274} 1234}
1275 1235
1276static void dec_nr_running(struct rq *rq) 1236static void dec_nr_running(struct task_struct *p, struct rq *rq)
1277{ 1237{
1278 rq->nr_running--; 1238 rq->nr_running--;
1239 dec_load(rq, p);
1279} 1240}
1280 1241
1281static void set_load_weight(struct task_struct *p) 1242static void set_load_weight(struct task_struct *p)
@@ -1367,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
1367 rq->nr_uninterruptible--; 1328 rq->nr_uninterruptible--;
1368 1329
1369 enqueue_task(rq, p, wakeup); 1330 enqueue_task(rq, p, wakeup);
1370 inc_nr_running(rq); 1331 inc_nr_running(p, rq);
1371} 1332}
1372 1333
1373/* 1334/*
@@ -1379,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
1379 rq->nr_uninterruptible++; 1340 rq->nr_uninterruptible++;
1380 1341
1381 dequeue_task(rq, p, sleep); 1342 dequeue_task(rq, p, sleep);
1382 dec_nr_running(rq); 1343 dec_nr_running(p, rq);
1383} 1344}
1384 1345
1385/** 1346/**
@@ -1831,6 +1792,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1831 long old_state; 1792 long old_state;
1832 struct rq *rq; 1793 struct rq *rq;
1833 1794
1795 smp_wmb();
1834 rq = task_rq_lock(p, &flags); 1796 rq = task_rq_lock(p, &flags);
1835 old_state = p->state; 1797 old_state = p->state;
1836 if (!(old_state & state)) 1798 if (!(old_state & state))
@@ -2018,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2018 * management (if any): 1980 * management (if any):
2019 */ 1981 */
2020 p->sched_class->task_new(rq, p); 1982 p->sched_class->task_new(rq, p);
2021 inc_nr_running(rq); 1983 inc_nr_running(p, rq);
2022 } 1984 }
2023 check_preempt_curr(rq, p); 1985 check_preempt_curr(rq, p);
2024#ifdef CONFIG_SMP 1986#ifdef CONFIG_SMP
@@ -3766,7 +3728,7 @@ void scheduler_tick(void)
3766 3728
3767#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 3729#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
3768 3730
3769void add_preempt_count(int val) 3731void __kprobes add_preempt_count(int val)
3770{ 3732{
3771 /* 3733 /*
3772 * Underflow? 3734 * Underflow?
@@ -3782,7 +3744,7 @@ void add_preempt_count(int val)
3782} 3744}
3783EXPORT_SYMBOL(add_preempt_count); 3745EXPORT_SYMBOL(add_preempt_count);
3784 3746
3785void sub_preempt_count(int val) 3747void __kprobes sub_preempt_count(int val)
3786{ 3748{
3787 /* 3749 /*
3788 * Underflow? 3750 * Underflow?
@@ -3884,7 +3846,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
3884asmlinkage void __sched schedule(void) 3846asmlinkage void __sched schedule(void)
3885{ 3847{
3886 struct task_struct *prev, *next; 3848 struct task_struct *prev, *next;
3887 long *switch_count; 3849 unsigned long *switch_count;
3888 struct rq *rq; 3850 struct rq *rq;
3889 int cpu; 3851 int cpu;
3890 3852
@@ -4357,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice)
4357 goto out_unlock; 4319 goto out_unlock;
4358 } 4320 }
4359 on_rq = p->se.on_rq; 4321 on_rq = p->se.on_rq;
4360 if (on_rq) 4322 if (on_rq) {
4361 dequeue_task(rq, p, 0); 4323 dequeue_task(rq, p, 0);
4324 dec_load(rq, p);
4325 }
4362 4326
4363 p->static_prio = NICE_TO_PRIO(nice); 4327 p->static_prio = NICE_TO_PRIO(nice);
4364 set_load_weight(p); 4328 set_load_weight(p);
@@ -4368,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice)
4368 4332
4369 if (on_rq) { 4333 if (on_rq) {
4370 enqueue_task(rq, p, 0); 4334 enqueue_task(rq, p, 0);
4335 inc_load(rq, p);
4371 /* 4336 /*
4372 * If the task increased its priority or is running and 4337 * If the task increased its priority or is running and
4373 * lowered its priority, then reschedule its CPU: 4338 * lowered its priority, then reschedule its CPU:
@@ -4457,7 +4422,7 @@ int task_nice(const struct task_struct *p)
4457{ 4422{
4458 return TASK_NICE(p); 4423 return TASK_NICE(p);
4459} 4424}
4460EXPORT_SYMBOL_GPL(task_nice); 4425EXPORT_SYMBOL(task_nice);
4461 4426
4462/** 4427/**
4463 * idle_cpu - is a given cpu idle currently? 4428 * idle_cpu - is a given cpu idle currently?
@@ -5135,7 +5100,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
5135 time_slice = 0; 5100 time_slice = 0;
5136 if (p->policy == SCHED_RR) { 5101 if (p->policy == SCHED_RR) {
5137 time_slice = DEF_TIMESLICE; 5102 time_slice = DEF_TIMESLICE;
5138 } else { 5103 } else if (p->policy != SCHED_FIFO) {
5139 struct sched_entity *se = &p->se; 5104 struct sched_entity *se = &p->se;
5140 unsigned long flags; 5105 unsigned long flags;
5141 struct rq *rq; 5106 struct rq *rq;
@@ -5848,6 +5813,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5848 /* Must be high prio: stop_machine expects to yield to it. */ 5813 /* Must be high prio: stop_machine expects to yield to it. */
5849 rq = task_rq_lock(p, &flags); 5814 rq = task_rq_lock(p, &flags);
5850 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); 5815 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
5816
5817 /* Update our root-domain */
5818 if (rq->rd) {
5819 BUG_ON(!cpu_isset(cpu, rq->rd->span));
5820 cpu_set(cpu, rq->rd->online);
5821 }
5822
5851 task_rq_unlock(rq, &flags); 5823 task_rq_unlock(rq, &flags);
5852 cpu_rq(cpu)->migration_thread = p; 5824 cpu_rq(cpu)->migration_thread = p;
5853 break; 5825 break;
@@ -5856,15 +5828,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5856 case CPU_ONLINE_FROZEN: 5828 case CPU_ONLINE_FROZEN:
5857 /* Strictly unnecessary, as first user will wake it. */ 5829 /* Strictly unnecessary, as first user will wake it. */
5858 wake_up_process(cpu_rq(cpu)->migration_thread); 5830 wake_up_process(cpu_rq(cpu)->migration_thread);
5859
5860 /* Update our root-domain */
5861 rq = cpu_rq(cpu);
5862 spin_lock_irqsave(&rq->lock, flags);
5863 if (rq->rd) {
5864 BUG_ON(!cpu_isset(cpu, rq->rd->span));
5865 cpu_set(cpu, rq->rd->online);
5866 }
5867 spin_unlock_irqrestore(&rq->lock, flags);
5868 break; 5831 break;
5869 5832
5870#ifdef CONFIG_HOTPLUG_CPU 5833#ifdef CONFIG_HOTPLUG_CPU
@@ -6140,8 +6103,6 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6140 rq->rd = rd; 6103 rq->rd = rd;
6141 6104
6142 cpu_set(rq->cpu, rd->span); 6105 cpu_set(rq->cpu, rd->span);
6143 if (cpu_isset(rq->cpu, cpu_online_map))
6144 cpu_set(rq->cpu, rd->online);
6145 6106
6146 for (class = sched_class_highest; class; class = class->next) { 6107 for (class = sched_class_highest; class; class = class->next) {
6147 if (class->join_domain) 6108 if (class->join_domain)
@@ -7082,21 +7043,6 @@ void __init sched_init_smp(void)
7082 if (set_cpus_allowed(current, non_isolated_cpus) < 0) 7043 if (set_cpus_allowed(current, non_isolated_cpus) < 0)
7083 BUG(); 7044 BUG();
7084 sched_init_granularity(); 7045 sched_init_granularity();
7085
7086#ifdef CONFIG_FAIR_GROUP_SCHED
7087 if (nr_cpu_ids == 1)
7088 return;
7089
7090 lb_monitor_task = kthread_create(load_balance_monitor, NULL,
7091 "group_balance");
7092 if (!IS_ERR(lb_monitor_task)) {
7093 lb_monitor_task->flags |= PF_NOFREEZE;
7094 wake_up_process(lb_monitor_task);
7095 } else {
7096 printk(KERN_ERR "Could not create load balance monitor thread"
7097 "(error = %ld) \n", PTR_ERR(lb_monitor_task));
7098 }
7099#endif
7100} 7046}
7101#else 7047#else
7102void __init sched_init_smp(void) 7048void __init sched_init_smp(void)
@@ -7283,6 +7229,8 @@ void __init sched_init(void)
7283 * During early bootup we pretend to be a normal task: 7229 * During early bootup we pretend to be a normal task:
7284 */ 7230 */
7285 current->sched_class = &fair_sched_class; 7231 current->sched_class = &fair_sched_class;
7232
7233 scheduler_running = 1;
7286} 7234}
7287 7235
7288#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 7236#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -7417,157 +7365,6 @@ void set_curr_task(int cpu, struct task_struct *p)
7417 7365
7418#ifdef CONFIG_GROUP_SCHED 7366#ifdef CONFIG_GROUP_SCHED
7419 7367
7420#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
7421/*
7422 * distribute shares of all task groups among their schedulable entities,
7423 * to reflect load distribution across cpus.
7424 */
7425static int rebalance_shares(struct sched_domain *sd, int this_cpu)
7426{
7427 struct cfs_rq *cfs_rq;
7428 struct rq *rq = cpu_rq(this_cpu);
7429 cpumask_t sdspan = sd->span;
7430 int balanced = 1;
7431
7432 /* Walk thr' all the task groups that we have */
7433 for_each_leaf_cfs_rq(rq, cfs_rq) {
7434 int i;
7435 unsigned long total_load = 0, total_shares;
7436 struct task_group *tg = cfs_rq->tg;
7437
7438 /* Gather total task load of this group across cpus */
7439 for_each_cpu_mask(i, sdspan)
7440 total_load += tg->cfs_rq[i]->load.weight;
7441
7442 /* Nothing to do if this group has no load */
7443 if (!total_load)
7444 continue;
7445
7446 /*
7447 * tg->shares represents the number of cpu shares the task group
7448 * is eligible to hold on a single cpu. On N cpus, it is
7449 * eligible to hold (N * tg->shares) number of cpu shares.
7450 */
7451 total_shares = tg->shares * cpus_weight(sdspan);
7452
7453 /*
7454 * redistribute total_shares across cpus as per the task load
7455 * distribution.
7456 */
7457 for_each_cpu_mask(i, sdspan) {
7458 unsigned long local_load, local_shares;
7459
7460 local_load = tg->cfs_rq[i]->load.weight;
7461 local_shares = (local_load * total_shares) / total_load;
7462 if (!local_shares)
7463 local_shares = MIN_GROUP_SHARES;
7464 if (local_shares == tg->se[i]->load.weight)
7465 continue;
7466
7467 spin_lock_irq(&cpu_rq(i)->lock);
7468 set_se_shares(tg->se[i], local_shares);
7469 spin_unlock_irq(&cpu_rq(i)->lock);
7470 balanced = 0;
7471 }
7472 }
7473
7474 return balanced;
7475}
7476
7477/*
7478 * How frequently should we rebalance_shares() across cpus?
7479 *
7480 * The more frequently we rebalance shares, the more accurate is the fairness
7481 * of cpu bandwidth distribution between task groups. However higher frequency
7482 * also implies increased scheduling overhead.
7483 *
7484 * sysctl_sched_min_bal_int_shares represents the minimum interval between
7485 * consecutive calls to rebalance_shares() in the same sched domain.
7486 *
7487 * sysctl_sched_max_bal_int_shares represents the maximum interval between
7488 * consecutive calls to rebalance_shares() in the same sched domain.
7489 *
7490 * These settings allows for the appropriate trade-off between accuracy of
7491 * fairness and the associated overhead.
7492 *
7493 */
7494
7495/* default: 8ms, units: milliseconds */
7496const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
7497
7498/* default: 128ms, units: milliseconds */
7499const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
7500
7501/* kernel thread that runs rebalance_shares() periodically */
7502static int load_balance_monitor(void *unused)
7503{
7504 unsigned int timeout = sysctl_sched_min_bal_int_shares;
7505 struct sched_param schedparm;
7506 int ret;
7507
7508 /*
7509 * We don't want this thread's execution to be limited by the shares
7510 * assigned to default group (init_task_group). Hence make it run
7511 * as a SCHED_RR RT task at the lowest priority.
7512 */
7513 schedparm.sched_priority = 1;
7514 ret = sched_setscheduler(current, SCHED_RR, &schedparm);
7515 if (ret)
7516 printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
7517 " monitor thread (error = %d) \n", ret);
7518
7519 while (!kthread_should_stop()) {
7520 int i, cpu, balanced = 1;
7521
7522 /* Prevent cpus going down or coming up */
7523 get_online_cpus();
7524 /* lockout changes to doms_cur[] array */
7525 lock_doms_cur();
7526 /*
7527 * Enter a rcu read-side critical section to safely walk rq->sd
7528 * chain on various cpus and to walk task group list
7529 * (rq->leaf_cfs_rq_list) in rebalance_shares().
7530 */
7531 rcu_read_lock();
7532
7533 for (i = 0; i < ndoms_cur; i++) {
7534 cpumask_t cpumap = doms_cur[i];
7535 struct sched_domain *sd = NULL, *sd_prev = NULL;
7536
7537 cpu = first_cpu(cpumap);
7538
7539 /* Find the highest domain at which to balance shares */
7540 for_each_domain(cpu, sd) {
7541 if (!(sd->flags & SD_LOAD_BALANCE))
7542 continue;
7543 sd_prev = sd;
7544 }
7545
7546 sd = sd_prev;
7547 /* sd == NULL? No load balance reqd in this domain */
7548 if (!sd)
7549 continue;
7550
7551 balanced &= rebalance_shares(sd, cpu);
7552 }
7553
7554 rcu_read_unlock();
7555
7556 unlock_doms_cur();
7557 put_online_cpus();
7558
7559 if (!balanced)
7560 timeout = sysctl_sched_min_bal_int_shares;
7561 else if (timeout < sysctl_sched_max_bal_int_shares)
7562 timeout *= 2;
7563
7564 msleep_interruptible(timeout);
7565 }
7566
7567 return 0;
7568}
7569#endif /* CONFIG_SMP */
7570
7571#ifdef CONFIG_FAIR_GROUP_SCHED 7368#ifdef CONFIG_FAIR_GROUP_SCHED
7572static void free_fair_sched_group(struct task_group *tg) 7369static void free_fair_sched_group(struct task_group *tg)
7573{ 7370{
@@ -7824,6 +7621,11 @@ void sched_move_task(struct task_struct *tsk)
7824 7621
7825 set_task_rq(tsk, task_cpu(tsk)); 7622 set_task_rq(tsk, task_cpu(tsk));
7826 7623
7624#ifdef CONFIG_FAIR_GROUP_SCHED
7625 if (tsk->sched_class->moved_group)
7626 tsk->sched_class->moved_group(tsk);
7627#endif
7628
7827 if (on_rq) { 7629 if (on_rq) {
7828 if (unlikely(running)) 7630 if (unlikely(running))
7829 tsk->sched_class->set_curr_task(rq); 7631 tsk->sched_class->set_curr_task(rq);
@@ -7834,29 +7636,25 @@ void sched_move_task(struct task_struct *tsk)
7834} 7636}
7835 7637
7836#ifdef CONFIG_FAIR_GROUP_SCHED 7638#ifdef CONFIG_FAIR_GROUP_SCHED
7837/* rq->lock to be locked by caller */
7838static void set_se_shares(struct sched_entity *se, unsigned long shares) 7639static void set_se_shares(struct sched_entity *se, unsigned long shares)
7839{ 7640{
7840 struct cfs_rq *cfs_rq = se->cfs_rq; 7641 struct cfs_rq *cfs_rq = se->cfs_rq;
7841 struct rq *rq = cfs_rq->rq; 7642 struct rq *rq = cfs_rq->rq;
7842 int on_rq; 7643 int on_rq;
7843 7644
7844 if (!shares) 7645 spin_lock_irq(&rq->lock);
7845 shares = MIN_GROUP_SHARES;
7846 7646
7847 on_rq = se->on_rq; 7647 on_rq = se->on_rq;
7848 if (on_rq) { 7648 if (on_rq)
7849 dequeue_entity(cfs_rq, se, 0); 7649 dequeue_entity(cfs_rq, se, 0);
7850 dec_cpu_load(rq, se->load.weight);
7851 }
7852 7650
7853 se->load.weight = shares; 7651 se->load.weight = shares;
7854 se->load.inv_weight = div64_64((1ULL<<32), shares); 7652 se->load.inv_weight = div64_64((1ULL<<32), shares);
7855 7653
7856 if (on_rq) { 7654 if (on_rq)
7857 enqueue_entity(cfs_rq, se, 0); 7655 enqueue_entity(cfs_rq, se, 0);
7858 inc_cpu_load(rq, se->load.weight); 7656
7859 } 7657 spin_unlock_irq(&rq->lock);
7860} 7658}
7861 7659
7862static DEFINE_MUTEX(shares_mutex); 7660static DEFINE_MUTEX(shares_mutex);
@@ -7866,18 +7664,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
7866 int i; 7664 int i;
7867 unsigned long flags; 7665 unsigned long flags;
7868 7666
7667 /*
7668 * A weight of 0 or 1 can cause arithmetics problems.
7669 * (The default weight is 1024 - so there's no practical
7670 * limitation from this.)
7671 */
7672 if (shares < 2)
7673 shares = 2;
7674
7869 mutex_lock(&shares_mutex); 7675 mutex_lock(&shares_mutex);
7870 if (tg->shares == shares) 7676 if (tg->shares == shares)
7871 goto done; 7677 goto done;
7872 7678
7873 if (shares < MIN_GROUP_SHARES)
7874 shares = MIN_GROUP_SHARES;
7875
7876 /*
7877 * Prevent any load balance activity (rebalance_shares,
7878 * load_balance_fair) from referring to this group first,
7879 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
7880 */
7881 spin_lock_irqsave(&task_group_lock, flags); 7679 spin_lock_irqsave(&task_group_lock, flags);
7882 for_each_possible_cpu(i) 7680 for_each_possible_cpu(i)
7883 unregister_fair_sched_group(tg, i); 7681 unregister_fair_sched_group(tg, i);
@@ -7891,11 +7689,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
7891 * w/o tripping rebalance_share or load_balance_fair. 7689 * w/o tripping rebalance_share or load_balance_fair.
7892 */ 7690 */
7893 tg->shares = shares; 7691 tg->shares = shares;
7894 for_each_possible_cpu(i) { 7692 for_each_possible_cpu(i)
7895 spin_lock_irq(&cpu_rq(i)->lock);
7896 set_se_shares(tg->se[i], shares); 7693 set_se_shares(tg->se[i], shares);
7897 spin_unlock_irq(&cpu_rq(i)->lock);
7898 }
7899 7694
7900 /* 7695 /*
7901 * Enable load balance activity on this group, by inserting it back on 7696 * Enable load balance activity on this group, by inserting it back on
@@ -7927,9 +7722,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
7927 if (runtime == RUNTIME_INF) 7722 if (runtime == RUNTIME_INF)
7928 return 1ULL << 16; 7723 return 1ULL << 16;
7929 7724
7930 runtime *= (1ULL << 16); 7725 return div64_64(runtime << 16, period);
7931 div64_64(runtime, period);
7932 return runtime;
7933} 7726}
7934 7727
7935static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 7728static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
@@ -7953,25 +7746,40 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
7953 return total + to_ratio(period, runtime) < global_ratio; 7746 return total + to_ratio(period, runtime) < global_ratio;
7954} 7747}
7955 7748
7749/* Must be called with tasklist_lock held */
7750static inline int tg_has_rt_tasks(struct task_group *tg)
7751{
7752 struct task_struct *g, *p;
7753 do_each_thread(g, p) {
7754 if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
7755 return 1;
7756 } while_each_thread(g, p);
7757 return 0;
7758}
7759
7956int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) 7760int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7957{ 7761{
7958 u64 rt_runtime, rt_period; 7762 u64 rt_runtime, rt_period;
7959 int err = 0; 7763 int err = 0;
7960 7764
7961 rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; 7765 rt_period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
7962 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; 7766 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
7963 if (rt_runtime_us == -1) 7767 if (rt_runtime_us == -1)
7964 rt_runtime = rt_period; 7768 rt_runtime = RUNTIME_INF;
7965 7769
7966 mutex_lock(&rt_constraints_mutex); 7770 mutex_lock(&rt_constraints_mutex);
7771 read_lock(&tasklist_lock);
7772 if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) {
7773 err = -EBUSY;
7774 goto unlock;
7775 }
7967 if (!__rt_schedulable(tg, rt_period, rt_runtime)) { 7776 if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
7968 err = -EINVAL; 7777 err = -EINVAL;
7969 goto unlock; 7778 goto unlock;
7970 } 7779 }
7971 if (rt_runtime_us == -1)
7972 rt_runtime = RUNTIME_INF;
7973 tg->rt_runtime = rt_runtime; 7780 tg->rt_runtime = rt_runtime;
7974 unlock: 7781 unlock:
7782 read_unlock(&tasklist_lock);
7975 mutex_unlock(&rt_constraints_mutex); 7783 mutex_unlock(&rt_constraints_mutex);
7976 7784
7977 return err; 7785 return err;