diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 302 |
1 files changed, 267 insertions, 35 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 9e0fd1ef1a47..076e403b9c88 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -630,6 +630,10 @@ struct rq { | |||
630 | struct list_head migration_queue; | 630 | struct list_head migration_queue; |
631 | #endif | 631 | #endif |
632 | 632 | ||
633 | /* calc_load related fields */ | ||
634 | unsigned long calc_load_update; | ||
635 | long calc_load_active; | ||
636 | |||
633 | #ifdef CONFIG_SCHED_HRTICK | 637 | #ifdef CONFIG_SCHED_HRTICK |
634 | #ifdef CONFIG_SMP | 638 | #ifdef CONFIG_SMP |
635 | int hrtick_csd_pending; | 639 | int hrtick_csd_pending; |
@@ -1728,6 +1732,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1728 | } | 1732 | } |
1729 | #endif | 1733 | #endif |
1730 | 1734 | ||
1735 | static void calc_load_account_active(struct rq *this_rq); | ||
1736 | |||
1731 | #include "sched_stats.h" | 1737 | #include "sched_stats.h" |
1732 | #include "sched_idletask.c" | 1738 | #include "sched_idletask.c" |
1733 | #include "sched_fair.c" | 1739 | #include "sched_fair.c" |
@@ -2458,6 +2464,17 @@ out: | |||
2458 | return success; | 2464 | return success; |
2459 | } | 2465 | } |
2460 | 2466 | ||
2467 | /** | ||
2468 | * wake_up_process - Wake up a specific process | ||
2469 | * @p: The process to be woken up. | ||
2470 | * | ||
2471 | * Attempt to wake up the nominated process and move it to the set of runnable | ||
2472 | * processes. Returns 1 if the process was woken up, 0 if it was already | ||
2473 | * running. | ||
2474 | * | ||
2475 | * It may be assumed that this function implies a write memory barrier before | ||
2476 | * changing the task state if and only if any tasks are woken up. | ||
2477 | */ | ||
2461 | int wake_up_process(struct task_struct *p) | 2478 | int wake_up_process(struct task_struct *p) |
2462 | { | 2479 | { |
2463 | return try_to_wake_up(p, TASK_ALL, 0); | 2480 | return try_to_wake_up(p, TASK_ALL, 0); |
@@ -2856,19 +2873,72 @@ unsigned long nr_iowait(void) | |||
2856 | return sum; | 2873 | return sum; |
2857 | } | 2874 | } |
2858 | 2875 | ||
2859 | unsigned long nr_active(void) | 2876 | /* Variables and functions for calc_load */ |
2877 | static atomic_long_t calc_load_tasks; | ||
2878 | static unsigned long calc_load_update; | ||
2879 | unsigned long avenrun[3]; | ||
2880 | EXPORT_SYMBOL(avenrun); | ||
2881 | |||
2882 | /** | ||
2883 | * get_avenrun - get the load average array | ||
2884 | * @loads: pointer to dest load array | ||
2885 | * @offset: offset to add | ||
2886 | * @shift: shift count to shift the result left | ||
2887 | * | ||
2888 | * These values are estimates at best, so no need for locking. | ||
2889 | */ | ||
2890 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
2860 | { | 2891 | { |
2861 | unsigned long i, running = 0, uninterruptible = 0; | 2892 | loads[0] = (avenrun[0] + offset) << shift; |
2893 | loads[1] = (avenrun[1] + offset) << shift; | ||
2894 | loads[2] = (avenrun[2] + offset) << shift; | ||
2895 | } | ||
2862 | 2896 | ||
2863 | for_each_online_cpu(i) { | 2897 | static unsigned long |
2864 | running += cpu_rq(i)->nr_running; | 2898 | calc_load(unsigned long load, unsigned long exp, unsigned long active) |
2865 | uninterruptible += cpu_rq(i)->nr_uninterruptible; | 2899 | { |
2866 | } | 2900 | load *= exp; |
2901 | load += active * (FIXED_1 - exp); | ||
2902 | return load >> FSHIFT; | ||
2903 | } | ||
2867 | 2904 | ||
2868 | if (unlikely((long)uninterruptible < 0)) | 2905 | /* |
2869 | uninterruptible = 0; | 2906 | * calc_load - update the avenrun load estimates 10 ticks after the |
2907 | * CPUs have updated calc_load_tasks. | ||
2908 | */ | ||
2909 | void calc_global_load(void) | ||
2910 | { | ||
2911 | unsigned long upd = calc_load_update + 10; | ||
2912 | long active; | ||
2913 | |||
2914 | if (time_before(jiffies, upd)) | ||
2915 | return; | ||
2916 | |||
2917 | active = atomic_long_read(&calc_load_tasks); | ||
2918 | active = active > 0 ? active * FIXED_1 : 0; | ||
2870 | 2919 | ||
2871 | return running + uninterruptible; | 2920 | avenrun[0] = calc_load(avenrun[0], EXP_1, active); |
2921 | avenrun[1] = calc_load(avenrun[1], EXP_5, active); | ||
2922 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | ||
2923 | |||
2924 | calc_load_update += LOAD_FREQ; | ||
2925 | } | ||
2926 | |||
2927 | /* | ||
2928 | * Either called from update_cpu_load() or from a cpu going idle | ||
2929 | */ | ||
2930 | static void calc_load_account_active(struct rq *this_rq) | ||
2931 | { | ||
2932 | long nr_active, delta; | ||
2933 | |||
2934 | nr_active = this_rq->nr_running; | ||
2935 | nr_active += (long) this_rq->nr_uninterruptible; | ||
2936 | |||
2937 | if (nr_active != this_rq->calc_load_active) { | ||
2938 | delta = nr_active - this_rq->calc_load_active; | ||
2939 | this_rq->calc_load_active = nr_active; | ||
2940 | atomic_long_add(delta, &calc_load_tasks); | ||
2941 | } | ||
2872 | } | 2942 | } |
2873 | 2943 | ||
2874 | /* | 2944 | /* |
@@ -2899,6 +2969,11 @@ static void update_cpu_load(struct rq *this_rq) | |||
2899 | new_load += scale-1; | 2969 | new_load += scale-1; |
2900 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | 2970 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; |
2901 | } | 2971 | } |
2972 | |||
2973 | if (time_after_eq(jiffies, this_rq->calc_load_update)) { | ||
2974 | this_rq->calc_load_update += LOAD_FREQ; | ||
2975 | calc_load_account_active(this_rq); | ||
2976 | } | ||
2902 | } | 2977 | } |
2903 | 2978 | ||
2904 | #ifdef CONFIG_SMP | 2979 | #ifdef CONFIG_SMP |
@@ -4240,10 +4315,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
4240 | static struct { | 4315 | static struct { |
4241 | atomic_t load_balancer; | 4316 | atomic_t load_balancer; |
4242 | cpumask_var_t cpu_mask; | 4317 | cpumask_var_t cpu_mask; |
4318 | cpumask_var_t ilb_grp_nohz_mask; | ||
4243 | } nohz ____cacheline_aligned = { | 4319 | } nohz ____cacheline_aligned = { |
4244 | .load_balancer = ATOMIC_INIT(-1), | 4320 | .load_balancer = ATOMIC_INIT(-1), |
4245 | }; | 4321 | }; |
4246 | 4322 | ||
4323 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
4324 | /** | ||
4325 | * lowest_flag_domain - Return lowest sched_domain containing flag. | ||
4326 | * @cpu: The cpu whose lowest level of sched domain is to | ||
4327 | * be returned. | ||
4328 | * @flag: The flag to check for the lowest sched_domain | ||
4329 | * for the given cpu. | ||
4330 | * | ||
4331 | * Returns the lowest sched_domain of a cpu which contains the given flag. | ||
4332 | */ | ||
4333 | static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) | ||
4334 | { | ||
4335 | struct sched_domain *sd; | ||
4336 | |||
4337 | for_each_domain(cpu, sd) | ||
4338 | if (sd && (sd->flags & flag)) | ||
4339 | break; | ||
4340 | |||
4341 | return sd; | ||
4342 | } | ||
4343 | |||
4344 | /** | ||
4345 | * for_each_flag_domain - Iterates over sched_domains containing the flag. | ||
4346 | * @cpu: The cpu whose domains we're iterating over. | ||
4347 | * @sd: variable holding the value of the power_savings_sd | ||
4348 | * for cpu. | ||
4349 | * @flag: The flag to filter the sched_domains to be iterated. | ||
4350 | * | ||
4351 | * Iterates over all the scheduler domains for a given cpu that has the 'flag' | ||
4352 | * set, starting from the lowest sched_domain to the highest. | ||
4353 | */ | ||
4354 | #define for_each_flag_domain(cpu, sd, flag) \ | ||
4355 | for (sd = lowest_flag_domain(cpu, flag); \ | ||
4356 | (sd && (sd->flags & flag)); sd = sd->parent) | ||
4357 | |||
4358 | /** | ||
4359 | * is_semi_idle_group - Checks if the given sched_group is semi-idle. | ||
4360 | * @ilb_group: group to be checked for semi-idleness | ||
4361 | * | ||
4362 | * Returns: 1 if the group is semi-idle. 0 otherwise. | ||
4363 | * | ||
4364 | * We define a sched_group to be semi idle if it has atleast one idle-CPU | ||
4365 | * and atleast one non-idle CPU. This helper function checks if the given | ||
4366 | * sched_group is semi-idle or not. | ||
4367 | */ | ||
4368 | static inline int is_semi_idle_group(struct sched_group *ilb_group) | ||
4369 | { | ||
4370 | cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask, | ||
4371 | sched_group_cpus(ilb_group)); | ||
4372 | |||
4373 | /* | ||
4374 | * A sched_group is semi-idle when it has atleast one busy cpu | ||
4375 | * and atleast one idle cpu. | ||
4376 | */ | ||
4377 | if (cpumask_empty(nohz.ilb_grp_nohz_mask)) | ||
4378 | return 0; | ||
4379 | |||
4380 | if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group))) | ||
4381 | return 0; | ||
4382 | |||
4383 | return 1; | ||
4384 | } | ||
4385 | /** | ||
4386 | * find_new_ilb - Finds the optimum idle load balancer for nomination. | ||
4387 | * @cpu: The cpu which is nominating a new idle_load_balancer. | ||
4388 | * | ||
4389 | * Returns: Returns the id of the idle load balancer if it exists, | ||
4390 | * Else, returns >= nr_cpu_ids. | ||
4391 | * | ||
4392 | * This algorithm picks the idle load balancer such that it belongs to a | ||
4393 | * semi-idle powersavings sched_domain. The idea is to try and avoid | ||
4394 | * completely idle packages/cores just for the purpose of idle load balancing | ||
4395 | * when there are other idle cpu's which are better suited for that job. | ||
4396 | */ | ||
4397 | static int find_new_ilb(int cpu) | ||
4398 | { | ||
4399 | struct sched_domain *sd; | ||
4400 | struct sched_group *ilb_group; | ||
4401 | |||
4402 | /* | ||
4403 | * Have idle load balancer selection from semi-idle packages only | ||
4404 | * when power-aware load balancing is enabled | ||
4405 | */ | ||
4406 | if (!(sched_smt_power_savings || sched_mc_power_savings)) | ||
4407 | goto out_done; | ||
4408 | |||
4409 | /* | ||
4410 | * Optimize for the case when we have no idle CPUs or only one | ||
4411 | * idle CPU. Don't walk the sched_domain hierarchy in such cases | ||
4412 | */ | ||
4413 | if (cpumask_weight(nohz.cpu_mask) < 2) | ||
4414 | goto out_done; | ||
4415 | |||
4416 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { | ||
4417 | ilb_group = sd->groups; | ||
4418 | |||
4419 | do { | ||
4420 | if (is_semi_idle_group(ilb_group)) | ||
4421 | return cpumask_first(nohz.ilb_grp_nohz_mask); | ||
4422 | |||
4423 | ilb_group = ilb_group->next; | ||
4424 | |||
4425 | } while (ilb_group != sd->groups); | ||
4426 | } | ||
4427 | |||
4428 | out_done: | ||
4429 | return cpumask_first(nohz.cpu_mask); | ||
4430 | } | ||
4431 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ | ||
4432 | static inline int find_new_ilb(int call_cpu) | ||
4433 | { | ||
4434 | return cpumask_first(nohz.cpu_mask); | ||
4435 | } | ||
4436 | #endif | ||
4437 | |||
4247 | /* | 4438 | /* |
4248 | * This routine will try to nominate the ilb (idle load balancing) | 4439 | * This routine will try to nominate the ilb (idle load balancing) |
4249 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle | 4440 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle |
@@ -4298,8 +4489,24 @@ int select_nohz_load_balancer(int stop_tick) | |||
4298 | /* make me the ilb owner */ | 4489 | /* make me the ilb owner */ |
4299 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) | 4490 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) |
4300 | return 1; | 4491 | return 1; |
4301 | } else if (atomic_read(&nohz.load_balancer) == cpu) | 4492 | } else if (atomic_read(&nohz.load_balancer) == cpu) { |
4493 | int new_ilb; | ||
4494 | |||
4495 | if (!(sched_smt_power_savings || | ||
4496 | sched_mc_power_savings)) | ||
4497 | return 1; | ||
4498 | /* | ||
4499 | * Check to see if there is a more power-efficient | ||
4500 | * ilb. | ||
4501 | */ | ||
4502 | new_ilb = find_new_ilb(cpu); | ||
4503 | if (new_ilb < nr_cpu_ids && new_ilb != cpu) { | ||
4504 | atomic_set(&nohz.load_balancer, -1); | ||
4505 | resched_cpu(new_ilb); | ||
4506 | return 0; | ||
4507 | } | ||
4302 | return 1; | 4508 | return 1; |
4509 | } | ||
4303 | } else { | 4510 | } else { |
4304 | if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) | 4511 | if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) |
4305 | return 0; | 4512 | return 0; |
@@ -4468,15 +4675,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) | |||
4468 | } | 4675 | } |
4469 | 4676 | ||
4470 | if (atomic_read(&nohz.load_balancer) == -1) { | 4677 | if (atomic_read(&nohz.load_balancer) == -1) { |
4471 | /* | 4678 | int ilb = find_new_ilb(cpu); |
4472 | * simple selection for now: Nominate the | ||
4473 | * first cpu in the nohz list to be the next | ||
4474 | * ilb owner. | ||
4475 | * | ||
4476 | * TBD: Traverse the sched domains and nominate | ||
4477 | * the nearest cpu in the nohz.cpu_mask. | ||
4478 | */ | ||
4479 | int ilb = cpumask_first(nohz.cpu_mask); | ||
4480 | 4679 | ||
4481 | if (ilb < nr_cpu_ids) | 4680 | if (ilb < nr_cpu_ids) |
4482 | resched_cpu(ilb); | 4681 | resched_cpu(ilb); |
@@ -5007,13 +5206,15 @@ pick_next_task(struct rq *rq) | |||
5007 | /* | 5206 | /* |
5008 | * schedule() is the main scheduler function. | 5207 | * schedule() is the main scheduler function. |
5009 | */ | 5208 | */ |
5010 | asmlinkage void __sched __schedule(void) | 5209 | asmlinkage void __sched schedule(void) |
5011 | { | 5210 | { |
5012 | struct task_struct *prev, *next; | 5211 | struct task_struct *prev, *next; |
5013 | unsigned long *switch_count; | 5212 | unsigned long *switch_count; |
5014 | struct rq *rq; | 5213 | struct rq *rq; |
5015 | int cpu; | 5214 | int cpu; |
5016 | 5215 | ||
5216 | need_resched: | ||
5217 | preempt_disable(); | ||
5017 | cpu = smp_processor_id(); | 5218 | cpu = smp_processor_id(); |
5018 | rq = cpu_rq(cpu); | 5219 | rq = cpu_rq(cpu); |
5019 | rcu_qsctr_inc(cpu); | 5220 | rcu_qsctr_inc(cpu); |
@@ -5070,15 +5271,9 @@ need_resched_nonpreemptible: | |||
5070 | 5271 | ||
5071 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 5272 | if (unlikely(reacquire_kernel_lock(current) < 0)) |
5072 | goto need_resched_nonpreemptible; | 5273 | goto need_resched_nonpreemptible; |
5073 | } | ||
5074 | 5274 | ||
5075 | asmlinkage void __sched schedule(void) | ||
5076 | { | ||
5077 | need_resched: | ||
5078 | preempt_disable(); | ||
5079 | __schedule(); | ||
5080 | preempt_enable_no_resched(); | 5275 | preempt_enable_no_resched(); |
5081 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 5276 | if (need_resched()) |
5082 | goto need_resched; | 5277 | goto need_resched; |
5083 | } | 5278 | } |
5084 | EXPORT_SYMBOL(schedule); | 5279 | EXPORT_SYMBOL(schedule); |
@@ -5221,7 +5416,7 @@ EXPORT_SYMBOL(default_wake_function); | |||
5221 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns | 5416 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns |
5222 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5417 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
5223 | */ | 5418 | */ |
5224 | void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5419 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
5225 | int nr_exclusive, int sync, void *key) | 5420 | int nr_exclusive, int sync, void *key) |
5226 | { | 5421 | { |
5227 | wait_queue_t *curr, *next; | 5422 | wait_queue_t *curr, *next; |
@@ -5241,6 +5436,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
5241 | * @mode: which threads | 5436 | * @mode: which threads |
5242 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 5437 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
5243 | * @key: is directly passed to the wakeup function | 5438 | * @key: is directly passed to the wakeup function |
5439 | * | ||
5440 | * It may be assumed that this function implies a write memory barrier before | ||
5441 | * changing the task state if and only if any tasks are woken up. | ||
5244 | */ | 5442 | */ |
5245 | void __wake_up(wait_queue_head_t *q, unsigned int mode, | 5443 | void __wake_up(wait_queue_head_t *q, unsigned int mode, |
5246 | int nr_exclusive, void *key) | 5444 | int nr_exclusive, void *key) |
@@ -5279,6 +5477,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
5279 | * with each other. This can prevent needless bouncing between CPUs. | 5477 | * with each other. This can prevent needless bouncing between CPUs. |
5280 | * | 5478 | * |
5281 | * On UP it can prevent extra preemption. | 5479 | * On UP it can prevent extra preemption. |
5480 | * | ||
5481 | * It may be assumed that this function implies a write memory barrier before | ||
5482 | * changing the task state if and only if any tasks are woken up. | ||
5282 | */ | 5483 | */ |
5283 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | 5484 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, |
5284 | int nr_exclusive, void *key) | 5485 | int nr_exclusive, void *key) |
@@ -5315,6 +5516,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ | |||
5315 | * awakened in the same order in which they were queued. | 5516 | * awakened in the same order in which they were queued. |
5316 | * | 5517 | * |
5317 | * See also complete_all(), wait_for_completion() and related routines. | 5518 | * See also complete_all(), wait_for_completion() and related routines. |
5519 | * | ||
5520 | * It may be assumed that this function implies a write memory barrier before | ||
5521 | * changing the task state if and only if any tasks are woken up. | ||
5318 | */ | 5522 | */ |
5319 | void complete(struct completion *x) | 5523 | void complete(struct completion *x) |
5320 | { | 5524 | { |
@@ -5332,6 +5536,9 @@ EXPORT_SYMBOL(complete); | |||
5332 | * @x: holds the state of this particular completion | 5536 | * @x: holds the state of this particular completion |
5333 | * | 5537 | * |
5334 | * This will wake up all threads waiting on this particular completion event. | 5538 | * This will wake up all threads waiting on this particular completion event. |
5539 | * | ||
5540 | * It may be assumed that this function implies a write memory barrier before | ||
5541 | * changing the task state if and only if any tasks are woken up. | ||
5335 | */ | 5542 | */ |
5336 | void complete_all(struct completion *x) | 5543 | void complete_all(struct completion *x) |
5337 | { | 5544 | { |
@@ -6490,8 +6697,9 @@ void sched_show_task(struct task_struct *p) | |||
6490 | #ifdef CONFIG_DEBUG_STACK_USAGE | 6697 | #ifdef CONFIG_DEBUG_STACK_USAGE |
6491 | free = stack_not_used(p); | 6698 | free = stack_not_used(p); |
6492 | #endif | 6699 | #endif |
6493 | printk(KERN_CONT "%5lu %5d %6d\n", free, | 6700 | printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
6494 | task_pid_nr(p), task_pid_nr(p->real_parent)); | 6701 | task_pid_nr(p), task_pid_nr(p->real_parent), |
6702 | (unsigned long)task_thread_info(p)->flags); | ||
6495 | 6703 | ||
6496 | show_stack(p, NULL); | 6704 | show_stack(p, NULL); |
6497 | } | 6705 | } |
@@ -6970,6 +7178,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
6970 | 7178 | ||
6971 | } | 7179 | } |
6972 | } | 7180 | } |
7181 | |||
7182 | /* | ||
7183 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
7184 | */ | ||
7185 | static void calc_global_load_remove(struct rq *rq) | ||
7186 | { | ||
7187 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | ||
7188 | } | ||
6973 | #endif /* CONFIG_HOTPLUG_CPU */ | 7189 | #endif /* CONFIG_HOTPLUG_CPU */ |
6974 | 7190 | ||
6975 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) | 7191 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) |
@@ -7204,6 +7420,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7204 | /* Update our root-domain */ | 7420 | /* Update our root-domain */ |
7205 | rq = cpu_rq(cpu); | 7421 | rq = cpu_rq(cpu); |
7206 | spin_lock_irqsave(&rq->lock, flags); | 7422 | spin_lock_irqsave(&rq->lock, flags); |
7423 | rq->calc_load_update = calc_load_update; | ||
7424 | rq->calc_load_active = 0; | ||
7207 | if (rq->rd) { | 7425 | if (rq->rd) { |
7208 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7426 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7209 | 7427 | ||
@@ -7243,7 +7461,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7243 | cpuset_unlock(); | 7461 | cpuset_unlock(); |
7244 | migrate_nr_uninterruptible(rq); | 7462 | migrate_nr_uninterruptible(rq); |
7245 | BUG_ON(rq->nr_running != 0); | 7463 | BUG_ON(rq->nr_running != 0); |
7246 | 7464 | calc_global_load_remove(rq); | |
7247 | /* | 7465 | /* |
7248 | * No need to migrate the tasks: it was best-effort if | 7466 | * No need to migrate the tasks: it was best-effort if |
7249 | * they didn't take sched_hotcpu_mutex. Just wake up | 7467 | * they didn't take sched_hotcpu_mutex. Just wake up |
@@ -7753,8 +7971,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
7753 | 7971 | ||
7754 | /* | 7972 | /* |
7755 | * The cpus mask in sched_group and sched_domain hangs off the end. | 7973 | * The cpus mask in sched_group and sched_domain hangs off the end. |
7756 | * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space | 7974 | * |
7757 | * for nr_cpu_ids < CONFIG_NR_CPUS. | 7975 | * ( See the the comments in include/linux/sched.h:struct sched_group |
7976 | * and struct sched_domain. ) | ||
7758 | */ | 7977 | */ |
7759 | struct static_sched_group { | 7978 | struct static_sched_group { |
7760 | struct sched_group sg; | 7979 | struct sched_group sg; |
@@ -7875,7 +8094,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
7875 | struct sched_domain *sd; | 8094 | struct sched_domain *sd; |
7876 | 8095 | ||
7877 | sd = &per_cpu(phys_domains, j).sd; | 8096 | sd = &per_cpu(phys_domains, j).sd; |
7878 | if (j != cpumask_first(sched_group_cpus(sd->groups))) { | 8097 | if (j != group_first_cpu(sd->groups)) { |
7879 | /* | 8098 | /* |
7880 | * Only add "power" once for each | 8099 | * Only add "power" once for each |
7881 | * physical package. | 8100 | * physical package. |
@@ -7953,7 +8172,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7953 | 8172 | ||
7954 | WARN_ON(!sd || !sd->groups); | 8173 | WARN_ON(!sd || !sd->groups); |
7955 | 8174 | ||
7956 | if (cpu != cpumask_first(sched_group_cpus(sd->groups))) | 8175 | if (cpu != group_first_cpu(sd->groups)) |
7957 | return; | 8176 | return; |
7958 | 8177 | ||
7959 | child = sd->child; | 8178 | child = sd->child; |
@@ -8938,6 +9157,8 @@ void __init sched_init(void) | |||
8938 | rq = cpu_rq(i); | 9157 | rq = cpu_rq(i); |
8939 | spin_lock_init(&rq->lock); | 9158 | spin_lock_init(&rq->lock); |
8940 | rq->nr_running = 0; | 9159 | rq->nr_running = 0; |
9160 | rq->calc_load_active = 0; | ||
9161 | rq->calc_load_update = jiffies + LOAD_FREQ; | ||
8941 | init_cfs_rq(&rq->cfs, rq); | 9162 | init_cfs_rq(&rq->cfs, rq); |
8942 | init_rt_rq(&rq->rt, rq); | 9163 | init_rt_rq(&rq->rt, rq); |
8943 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9164 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -9045,6 +9266,9 @@ void __init sched_init(void) | |||
9045 | * when this runqueue becomes "idle". | 9266 | * when this runqueue becomes "idle". |
9046 | */ | 9267 | */ |
9047 | init_idle(current, smp_processor_id()); | 9268 | init_idle(current, smp_processor_id()); |
9269 | |||
9270 | calc_load_update = jiffies + LOAD_FREQ; | ||
9271 | |||
9048 | /* | 9272 | /* |
9049 | * During early bootup we pretend to be a normal task: | 9273 | * During early bootup we pretend to be a normal task: |
9050 | */ | 9274 | */ |
@@ -9055,6 +9279,7 @@ void __init sched_init(void) | |||
9055 | #ifdef CONFIG_SMP | 9279 | #ifdef CONFIG_SMP |
9056 | #ifdef CONFIG_NO_HZ | 9280 | #ifdef CONFIG_NO_HZ |
9057 | alloc_bootmem_cpumask_var(&nohz.cpu_mask); | 9281 | alloc_bootmem_cpumask_var(&nohz.cpu_mask); |
9282 | alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask); | ||
9058 | #endif | 9283 | #endif |
9059 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | 9284 | alloc_bootmem_cpumask_var(&cpu_isolated_map); |
9060 | #endif /* SMP */ | 9285 | #endif /* SMP */ |
@@ -9800,6 +10025,13 @@ static int sched_rt_global_constraints(void) | |||
9800 | if (sysctl_sched_rt_period <= 0) | 10025 | if (sysctl_sched_rt_period <= 0) |
9801 | return -EINVAL; | 10026 | return -EINVAL; |
9802 | 10027 | ||
10028 | /* | ||
10029 | * There's always some RT tasks in the root group | ||
10030 | * -- migration, kstopmachine etc.. | ||
10031 | */ | ||
10032 | if (sysctl_sched_rt_runtime == 0) | ||
10033 | return -EBUSY; | ||
10034 | |||
9803 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 10035 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
9804 | for_each_possible_cpu(i) { | 10036 | for_each_possible_cpu(i) { |
9805 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 10037 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |