aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c338
1 files changed, 75 insertions, 263 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index b387a8de26a5..1cb53fb1fe3d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -174,41 +174,6 @@ struct task_group {
174 struct sched_entity **se; 174 struct sched_entity **se;
175 /* runqueue "owned" by this group on each cpu */ 175 /* runqueue "owned" by this group on each cpu */
176 struct cfs_rq **cfs_rq; 176 struct cfs_rq **cfs_rq;
177
178 /*
179 * shares assigned to a task group governs how much of cpu bandwidth
180 * is allocated to the group. The more shares a group has, the more is
181 * the cpu bandwidth allocated to it.
182 *
183 * For ex, lets say that there are three task groups, A, B and C which
184 * have been assigned shares 1000, 2000 and 3000 respectively. Then,
185 * cpu bandwidth allocated by the scheduler to task groups A, B and C
186 * should be:
187 *
188 * Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66%
189 * Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33%
190 * Bw(C) = 3000/(1000+2000+3000) * 100 = 50%
191 *
192 * The weight assigned to a task group's schedulable entities on every
193 * cpu (task_group.se[a_cpu]->load.weight) is derived from the task
194 * group's shares. For ex: lets say that task group A has been
195 * assigned shares of 1000 and there are two CPUs in a system. Then,
196 *
197 * tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000;
198 *
199 * Note: It's not necessary that each of a task's group schedulable
200 * entity have the same weight on all CPUs. If the group
201 * has 2 of its tasks on CPU0 and 1 task on CPU1, then a
202 * better distribution of weight could be:
203 *
204 * tg_A->se[0]->load.weight = 2/3 * 2000 = 1333
205 * tg_A->se[1]->load.weight = 1/2 * 2000 = 667
206 *
207 * rebalance_shares() is responsible for distributing the shares of a
208 * task groups like this among the group's schedulable entities across
209 * cpus.
210 *
211 */
212 unsigned long shares; 177 unsigned long shares;
213#endif 178#endif
214 179
@@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock);
250static DEFINE_MUTEX(doms_cur_mutex); 215static DEFINE_MUTEX(doms_cur_mutex);
251 216
252#ifdef CONFIG_FAIR_GROUP_SCHED 217#ifdef CONFIG_FAIR_GROUP_SCHED
253#ifdef CONFIG_SMP
254/* kernel thread that runs rebalance_shares() periodically */
255static struct task_struct *lb_monitor_task;
256static int load_balance_monitor(void *unused);
257#endif
258
259static void set_se_shares(struct sched_entity *se, unsigned long shares);
260
261#ifdef CONFIG_USER_SCHED 218#ifdef CONFIG_USER_SCHED
262# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) 219# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
263#else 220#else
264# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 221# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
265#endif 222#endif
266 223
267#define MIN_GROUP_SHARES 2
268
269static int init_task_group_load = INIT_TASK_GROUP_LOAD; 224static int init_task_group_load = INIT_TASK_GROUP_LOAD;
270#endif 225#endif
271 226
@@ -668,6 +623,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
668 */ 623 */
669unsigned int sysctl_sched_rt_period = 1000000; 624unsigned int sysctl_sched_rt_period = 1000000;
670 625
626static __read_mostly int scheduler_running;
627
671/* 628/*
672 * part of the period that we allow rt tasks to run in us. 629 * part of the period that we allow rt tasks to run in us.
673 * default: 0.95s 630 * default: 0.95s
@@ -689,14 +646,16 @@ unsigned long long cpu_clock(int cpu)
689 unsigned long flags; 646 unsigned long flags;
690 struct rq *rq; 647 struct rq *rq;
691 648
692 local_irq_save(flags);
693 rq = cpu_rq(cpu);
694 /* 649 /*
695 * Only call sched_clock() if the scheduler has already been 650 * Only call sched_clock() if the scheduler has already been
696 * initialized (some code might call cpu_clock() very early): 651 * initialized (some code might call cpu_clock() very early):
697 */ 652 */
698 if (rq->idle) 653 if (unlikely(!scheduler_running))
699 update_rq_clock(rq); 654 return 0;
655
656 local_irq_save(flags);
657 rq = cpu_rq(cpu);
658 update_rq_clock(rq);
700 now = rq->clock; 659 now = rq->clock;
701 local_irq_restore(flags); 660 local_irq_restore(flags);
702 661
@@ -1241,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1241static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1200static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1242#endif 1201#endif
1243 1202
1244static inline void inc_cpu_load(struct rq *rq, unsigned long load)
1245{
1246 update_load_add(&rq->load, load);
1247}
1248
1249static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1250{
1251 update_load_sub(&rq->load, load);
1252}
1253
1254#ifdef CONFIG_SMP 1203#ifdef CONFIG_SMP
1255static unsigned long source_load(int cpu, int type); 1204static unsigned long source_load(int cpu, int type);
1256static unsigned long target_load(int cpu, int type); 1205static unsigned long target_load(int cpu, int type);
@@ -1268,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1268 1217
1269#define sched_class_highest (&rt_sched_class) 1218#define sched_class_highest (&rt_sched_class)
1270 1219
1271static void inc_nr_running(struct rq *rq) 1220static inline void inc_load(struct rq *rq, const struct task_struct *p)
1221{
1222 update_load_add(&rq->load, p->se.load.weight);
1223}
1224
1225static inline void dec_load(struct rq *rq, const struct task_struct *p)
1226{
1227 update_load_sub(&rq->load, p->se.load.weight);
1228}
1229
1230static void inc_nr_running(struct task_struct *p, struct rq *rq)
1272{ 1231{
1273 rq->nr_running++; 1232 rq->nr_running++;
1233 inc_load(rq, p);
1274} 1234}
1275 1235
1276static void dec_nr_running(struct rq *rq) 1236static void dec_nr_running(struct task_struct *p, struct rq *rq)
1277{ 1237{
1278 rq->nr_running--; 1238 rq->nr_running--;
1239 dec_load(rq, p);
1279} 1240}
1280 1241
1281static void set_load_weight(struct task_struct *p) 1242static void set_load_weight(struct task_struct *p)
@@ -1367,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
1367 rq->nr_uninterruptible--; 1328 rq->nr_uninterruptible--;
1368 1329
1369 enqueue_task(rq, p, wakeup); 1330 enqueue_task(rq, p, wakeup);
1370 inc_nr_running(rq); 1331 inc_nr_running(p, rq);
1371} 1332}
1372 1333
1373/* 1334/*
@@ -1379,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
1379 rq->nr_uninterruptible++; 1340 rq->nr_uninterruptible++;
1380 1341
1381 dequeue_task(rq, p, sleep); 1342 dequeue_task(rq, p, sleep);
1382 dec_nr_running(rq); 1343 dec_nr_running(p, rq);
1383} 1344}
1384 1345
1385/** 1346/**
@@ -2019,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2019 * management (if any): 1980 * management (if any):
2020 */ 1981 */
2021 p->sched_class->task_new(rq, p); 1982 p->sched_class->task_new(rq, p);
2022 inc_nr_running(rq); 1983 inc_nr_running(p, rq);
2023 } 1984 }
2024 check_preempt_curr(rq, p); 1985 check_preempt_curr(rq, p);
2025#ifdef CONFIG_SMP 1986#ifdef CONFIG_SMP
@@ -3885,7 +3846,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
3885asmlinkage void __sched schedule(void) 3846asmlinkage void __sched schedule(void)
3886{ 3847{
3887 struct task_struct *prev, *next; 3848 struct task_struct *prev, *next;
3888 long *switch_count; 3849 unsigned long *switch_count;
3889 struct rq *rq; 3850 struct rq *rq;
3890 int cpu; 3851 int cpu;
3891 3852
@@ -4358,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice)
4358 goto out_unlock; 4319 goto out_unlock;
4359 } 4320 }
4360 on_rq = p->se.on_rq; 4321 on_rq = p->se.on_rq;
4361 if (on_rq) 4322 if (on_rq) {
4362 dequeue_task(rq, p, 0); 4323 dequeue_task(rq, p, 0);
4324 dec_load(rq, p);
4325 }
4363 4326
4364 p->static_prio = NICE_TO_PRIO(nice); 4327 p->static_prio = NICE_TO_PRIO(nice);
4365 set_load_weight(p); 4328 set_load_weight(p);
@@ -4369,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice)
4369 4332
4370 if (on_rq) { 4333 if (on_rq) {
4371 enqueue_task(rq, p, 0); 4334 enqueue_task(rq, p, 0);
4335 inc_load(rq, p);
4372 /* 4336 /*
4373 * If the task increased its priority or is running and 4337 * If the task increased its priority or is running and
4374 * lowered its priority, then reschedule its CPU: 4338 * lowered its priority, then reschedule its CPU:
@@ -4458,7 +4422,7 @@ int task_nice(const struct task_struct *p)
4458{ 4422{
4459 return TASK_NICE(p); 4423 return TASK_NICE(p);
4460} 4424}
4461EXPORT_SYMBOL_GPL(task_nice); 4425EXPORT_SYMBOL(task_nice);
4462 4426
4463/** 4427/**
4464 * idle_cpu - is a given cpu idle currently? 4428 * idle_cpu - is a given cpu idle currently?
@@ -5136,7 +5100,7 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
5136 time_slice = 0; 5100 time_slice = 0;
5137 if (p->policy == SCHED_RR) { 5101 if (p->policy == SCHED_RR) {
5138 time_slice = DEF_TIMESLICE; 5102 time_slice = DEF_TIMESLICE;
5139 } else { 5103 } else if (p->policy != SCHED_FIFO) {
5140 struct sched_entity *se = &p->se; 5104 struct sched_entity *se = &p->se;
5141 unsigned long flags; 5105 unsigned long flags;
5142 struct rq *rq; 5106 struct rq *rq;
@@ -5917,7 +5881,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5917 spin_unlock_irq(&rq->lock); 5881 spin_unlock_irq(&rq->lock);
5918 break; 5882 break;
5919 5883
5920 case CPU_DOWN_PREPARE: 5884 case CPU_DYING:
5885 case CPU_DYING_FROZEN:
5921 /* Update our root-domain */ 5886 /* Update our root-domain */
5922 rq = cpu_rq(cpu); 5887 rq = cpu_rq(cpu);
5923 spin_lock_irqsave(&rq->lock, flags); 5888 spin_lock_irqsave(&rq->lock, flags);
@@ -7083,21 +7048,6 @@ void __init sched_init_smp(void)
7083 if (set_cpus_allowed(current, non_isolated_cpus) < 0) 7048 if (set_cpus_allowed(current, non_isolated_cpus) < 0)
7084 BUG(); 7049 BUG();
7085 sched_init_granularity(); 7050 sched_init_granularity();
7086
7087#ifdef CONFIG_FAIR_GROUP_SCHED
7088 if (nr_cpu_ids == 1)
7089 return;
7090
7091 lb_monitor_task = kthread_create(load_balance_monitor, NULL,
7092 "group_balance");
7093 if (!IS_ERR(lb_monitor_task)) {
7094 lb_monitor_task->flags |= PF_NOFREEZE;
7095 wake_up_process(lb_monitor_task);
7096 } else {
7097 printk(KERN_ERR "Could not create load balance monitor thread"
7098 "(error = %ld) \n", PTR_ERR(lb_monitor_task));
7099 }
7100#endif
7101} 7051}
7102#else 7052#else
7103void __init sched_init_smp(void) 7053void __init sched_init_smp(void)
@@ -7284,6 +7234,8 @@ void __init sched_init(void)
7284 * During early bootup we pretend to be a normal task: 7234 * During early bootup we pretend to be a normal task:
7285 */ 7235 */
7286 current->sched_class = &fair_sched_class; 7236 current->sched_class = &fair_sched_class;
7237
7238 scheduler_running = 1;
7287} 7239}
7288 7240
7289#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 7241#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -7418,157 +7370,6 @@ void set_curr_task(int cpu, struct task_struct *p)
7418 7370
7419#ifdef CONFIG_GROUP_SCHED 7371#ifdef CONFIG_GROUP_SCHED
7420 7372
7421#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
7422/*
7423 * distribute shares of all task groups among their schedulable entities,
7424 * to reflect load distribution across cpus.
7425 */
7426static int rebalance_shares(struct sched_domain *sd, int this_cpu)
7427{
7428 struct cfs_rq *cfs_rq;
7429 struct rq *rq = cpu_rq(this_cpu);
7430 cpumask_t sdspan = sd->span;
7431 int balanced = 1;
7432
7433 /* Walk thr' all the task groups that we have */
7434 for_each_leaf_cfs_rq(rq, cfs_rq) {
7435 int i;
7436 unsigned long total_load = 0, total_shares;
7437 struct task_group *tg = cfs_rq->tg;
7438
7439 /* Gather total task load of this group across cpus */
7440 for_each_cpu_mask(i, sdspan)
7441 total_load += tg->cfs_rq[i]->load.weight;
7442
7443 /* Nothing to do if this group has no load */
7444 if (!total_load)
7445 continue;
7446
7447 /*
7448 * tg->shares represents the number of cpu shares the task group
7449 * is eligible to hold on a single cpu. On N cpus, it is
7450 * eligible to hold (N * tg->shares) number of cpu shares.
7451 */
7452 total_shares = tg->shares * cpus_weight(sdspan);
7453
7454 /*
7455 * redistribute total_shares across cpus as per the task load
7456 * distribution.
7457 */
7458 for_each_cpu_mask(i, sdspan) {
7459 unsigned long local_load, local_shares;
7460
7461 local_load = tg->cfs_rq[i]->load.weight;
7462 local_shares = (local_load * total_shares) / total_load;
7463 if (!local_shares)
7464 local_shares = MIN_GROUP_SHARES;
7465 if (local_shares == tg->se[i]->load.weight)
7466 continue;
7467
7468 spin_lock_irq(&cpu_rq(i)->lock);
7469 set_se_shares(tg->se[i], local_shares);
7470 spin_unlock_irq(&cpu_rq(i)->lock);
7471 balanced = 0;
7472 }
7473 }
7474
7475 return balanced;
7476}
7477
7478/*
7479 * How frequently should we rebalance_shares() across cpus?
7480 *
7481 * The more frequently we rebalance shares, the more accurate is the fairness
7482 * of cpu bandwidth distribution between task groups. However higher frequency
7483 * also implies increased scheduling overhead.
7484 *
7485 * sysctl_sched_min_bal_int_shares represents the minimum interval between
7486 * consecutive calls to rebalance_shares() in the same sched domain.
7487 *
7488 * sysctl_sched_max_bal_int_shares represents the maximum interval between
7489 * consecutive calls to rebalance_shares() in the same sched domain.
7490 *
7491 * These settings allows for the appropriate trade-off between accuracy of
7492 * fairness and the associated overhead.
7493 *
7494 */
7495
7496/* default: 8ms, units: milliseconds */
7497const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
7498
7499/* default: 128ms, units: milliseconds */
7500const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
7501
7502/* kernel thread that runs rebalance_shares() periodically */
7503static int load_balance_monitor(void *unused)
7504{
7505 unsigned int timeout = sysctl_sched_min_bal_int_shares;
7506 struct sched_param schedparm;
7507 int ret;
7508
7509 /*
7510 * We don't want this thread's execution to be limited by the shares
7511 * assigned to default group (init_task_group). Hence make it run
7512 * as a SCHED_RR RT task at the lowest priority.
7513 */
7514 schedparm.sched_priority = 1;
7515 ret = sched_setscheduler(current, SCHED_RR, &schedparm);
7516 if (ret)
7517 printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
7518 " monitor thread (error = %d) \n", ret);
7519
7520 while (!kthread_should_stop()) {
7521 int i, cpu, balanced = 1;
7522
7523 /* Prevent cpus going down or coming up */
7524 get_online_cpus();
7525 /* lockout changes to doms_cur[] array */
7526 lock_doms_cur();
7527 /*
7528 * Enter a rcu read-side critical section to safely walk rq->sd
7529 * chain on various cpus and to walk task group list
7530 * (rq->leaf_cfs_rq_list) in rebalance_shares().
7531 */
7532 rcu_read_lock();
7533
7534 for (i = 0; i < ndoms_cur; i++) {
7535 cpumask_t cpumap = doms_cur[i];
7536 struct sched_domain *sd = NULL, *sd_prev = NULL;
7537
7538 cpu = first_cpu(cpumap);
7539
7540 /* Find the highest domain at which to balance shares */
7541 for_each_domain(cpu, sd) {
7542 if (!(sd->flags & SD_LOAD_BALANCE))
7543 continue;
7544 sd_prev = sd;
7545 }
7546
7547 sd = sd_prev;
7548 /* sd == NULL? No load balance reqd in this domain */
7549 if (!sd)
7550 continue;
7551
7552 balanced &= rebalance_shares(sd, cpu);
7553 }
7554
7555 rcu_read_unlock();
7556
7557 unlock_doms_cur();
7558 put_online_cpus();
7559
7560 if (!balanced)
7561 timeout = sysctl_sched_min_bal_int_shares;
7562 else if (timeout < sysctl_sched_max_bal_int_shares)
7563 timeout *= 2;
7564
7565 msleep_interruptible(timeout);
7566 }
7567
7568 return 0;
7569}
7570#endif /* CONFIG_SMP */
7571
7572#ifdef CONFIG_FAIR_GROUP_SCHED 7373#ifdef CONFIG_FAIR_GROUP_SCHED
7573static void free_fair_sched_group(struct task_group *tg) 7374static void free_fair_sched_group(struct task_group *tg)
7574{ 7375{
@@ -7825,6 +7626,11 @@ void sched_move_task(struct task_struct *tsk)
7825 7626
7826 set_task_rq(tsk, task_cpu(tsk)); 7627 set_task_rq(tsk, task_cpu(tsk));
7827 7628
7629#ifdef CONFIG_FAIR_GROUP_SCHED
7630 if (tsk->sched_class->moved_group)
7631 tsk->sched_class->moved_group(tsk);
7632#endif
7633
7828 if (on_rq) { 7634 if (on_rq) {
7829 if (unlikely(running)) 7635 if (unlikely(running))
7830 tsk->sched_class->set_curr_task(rq); 7636 tsk->sched_class->set_curr_task(rq);
@@ -7835,29 +7641,25 @@ void sched_move_task(struct task_struct *tsk)
7835} 7641}
7836 7642
7837#ifdef CONFIG_FAIR_GROUP_SCHED 7643#ifdef CONFIG_FAIR_GROUP_SCHED
7838/* rq->lock to be locked by caller */
7839static void set_se_shares(struct sched_entity *se, unsigned long shares) 7644static void set_se_shares(struct sched_entity *se, unsigned long shares)
7840{ 7645{
7841 struct cfs_rq *cfs_rq = se->cfs_rq; 7646 struct cfs_rq *cfs_rq = se->cfs_rq;
7842 struct rq *rq = cfs_rq->rq; 7647 struct rq *rq = cfs_rq->rq;
7843 int on_rq; 7648 int on_rq;
7844 7649
7845 if (!shares) 7650 spin_lock_irq(&rq->lock);
7846 shares = MIN_GROUP_SHARES;
7847 7651
7848 on_rq = se->on_rq; 7652 on_rq = se->on_rq;
7849 if (on_rq) { 7653 if (on_rq)
7850 dequeue_entity(cfs_rq, se, 0); 7654 dequeue_entity(cfs_rq, se, 0);
7851 dec_cpu_load(rq, se->load.weight);
7852 }
7853 7655
7854 se->load.weight = shares; 7656 se->load.weight = shares;
7855 se->load.inv_weight = div64_64((1ULL<<32), shares); 7657 se->load.inv_weight = div64_64((1ULL<<32), shares);
7856 7658
7857 if (on_rq) { 7659 if (on_rq)
7858 enqueue_entity(cfs_rq, se, 0); 7660 enqueue_entity(cfs_rq, se, 0);
7859 inc_cpu_load(rq, se->load.weight); 7661
7860 } 7662 spin_unlock_irq(&rq->lock);
7861} 7663}
7862 7664
7863static DEFINE_MUTEX(shares_mutex); 7665static DEFINE_MUTEX(shares_mutex);
@@ -7867,18 +7669,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
7867 int i; 7669 int i;
7868 unsigned long flags; 7670 unsigned long flags;
7869 7671
7672 /*
7673 * A weight of 0 or 1 can cause arithmetics problems.
7674 * (The default weight is 1024 - so there's no practical
7675 * limitation from this.)
7676 */
7677 if (shares < 2)
7678 shares = 2;
7679
7870 mutex_lock(&shares_mutex); 7680 mutex_lock(&shares_mutex);
7871 if (tg->shares == shares) 7681 if (tg->shares == shares)
7872 goto done; 7682 goto done;
7873 7683
7874 if (shares < MIN_GROUP_SHARES)
7875 shares = MIN_GROUP_SHARES;
7876
7877 /*
7878 * Prevent any load balance activity (rebalance_shares,
7879 * load_balance_fair) from referring to this group first,
7880 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
7881 */
7882 spin_lock_irqsave(&task_group_lock, flags); 7684 spin_lock_irqsave(&task_group_lock, flags);
7883 for_each_possible_cpu(i) 7685 for_each_possible_cpu(i)
7884 unregister_fair_sched_group(tg, i); 7686 unregister_fair_sched_group(tg, i);
@@ -7892,11 +7694,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
7892 * w/o tripping rebalance_share or load_balance_fair. 7694 * w/o tripping rebalance_share or load_balance_fair.
7893 */ 7695 */
7894 tg->shares = shares; 7696 tg->shares = shares;
7895 for_each_possible_cpu(i) { 7697 for_each_possible_cpu(i)
7896 spin_lock_irq(&cpu_rq(i)->lock);
7897 set_se_shares(tg->se[i], shares); 7698 set_se_shares(tg->se[i], shares);
7898 spin_unlock_irq(&cpu_rq(i)->lock);
7899 }
7900 7699
7901 /* 7700 /*
7902 * Enable load balance activity on this group, by inserting it back on 7701 * Enable load balance activity on this group, by inserting it back on
@@ -7928,9 +7727,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
7928 if (runtime == RUNTIME_INF) 7727 if (runtime == RUNTIME_INF)
7929 return 1ULL << 16; 7728 return 1ULL << 16;
7930 7729
7931 runtime *= (1ULL << 16); 7730 return div64_64(runtime << 16, period);
7932 div64_64(runtime, period);
7933 return runtime;
7934} 7731}
7935 7732
7936static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 7733static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
@@ -7954,25 +7751,40 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
7954 return total + to_ratio(period, runtime) < global_ratio; 7751 return total + to_ratio(period, runtime) < global_ratio;
7955} 7752}
7956 7753
7754/* Must be called with tasklist_lock held */
7755static inline int tg_has_rt_tasks(struct task_group *tg)
7756{
7757 struct task_struct *g, *p;
7758 do_each_thread(g, p) {
7759 if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
7760 return 1;
7761 } while_each_thread(g, p);
7762 return 0;
7763}
7764
7957int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) 7765int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7958{ 7766{
7959 u64 rt_runtime, rt_period; 7767 u64 rt_runtime, rt_period;
7960 int err = 0; 7768 int err = 0;
7961 7769
7962 rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; 7770 rt_period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
7963 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; 7771 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
7964 if (rt_runtime_us == -1) 7772 if (rt_runtime_us == -1)
7965 rt_runtime = rt_period; 7773 rt_runtime = RUNTIME_INF;
7966 7774
7967 mutex_lock(&rt_constraints_mutex); 7775 mutex_lock(&rt_constraints_mutex);
7776 read_lock(&tasklist_lock);
7777 if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) {
7778 err = -EBUSY;
7779 goto unlock;
7780 }
7968 if (!__rt_schedulable(tg, rt_period, rt_runtime)) { 7781 if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
7969 err = -EINVAL; 7782 err = -EINVAL;
7970 goto unlock; 7783 goto unlock;
7971 } 7784 }
7972 if (rt_runtime_us == -1)
7973 rt_runtime = RUNTIME_INF;
7974 tg->rt_runtime = rt_runtime; 7785 tg->rt_runtime = rt_runtime;
7975 unlock: 7786 unlock:
7787 read_unlock(&tasklist_lock);
7976 mutex_unlock(&rt_constraints_mutex); 7788 mutex_unlock(&rt_constraints_mutex);
7977 7789
7978 return err; 7790 return err;