diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 304 |
1 files changed, 268 insertions, 36 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 6530a27052f3..14c447ae5d53 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -626,6 +626,10 @@ struct rq { | |||
626 | struct list_head migration_queue; | 626 | struct list_head migration_queue; |
627 | #endif | 627 | #endif |
628 | 628 | ||
629 | /* calc_load related fields */ | ||
630 | unsigned long calc_load_update; | ||
631 | long calc_load_active; | ||
632 | |||
629 | #ifdef CONFIG_SCHED_HRTICK | 633 | #ifdef CONFIG_SCHED_HRTICK |
630 | #ifdef CONFIG_SMP | 634 | #ifdef CONFIG_SMP |
631 | int hrtick_csd_pending; | 635 | int hrtick_csd_pending; |
@@ -1724,6 +1728,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1724 | } | 1728 | } |
1725 | #endif | 1729 | #endif |
1726 | 1730 | ||
1731 | static void calc_load_account_active(struct rq *this_rq); | ||
1732 | |||
1727 | #include "sched_stats.h" | 1733 | #include "sched_stats.h" |
1728 | #include "sched_idletask.c" | 1734 | #include "sched_idletask.c" |
1729 | #include "sched_fair.c" | 1735 | #include "sched_fair.c" |
@@ -2497,6 +2503,17 @@ out: | |||
2497 | return success; | 2503 | return success; |
2498 | } | 2504 | } |
2499 | 2505 | ||
2506 | /** | ||
2507 | * wake_up_process - Wake up a specific process | ||
2508 | * @p: The process to be woken up. | ||
2509 | * | ||
2510 | * Attempt to wake up the nominated process and move it to the set of runnable | ||
2511 | * processes. Returns 1 if the process was woken up, 0 if it was already | ||
2512 | * running. | ||
2513 | * | ||
2514 | * It may be assumed that this function implies a write memory barrier before | ||
2515 | * changing the task state if and only if any tasks are woken up. | ||
2516 | */ | ||
2500 | int wake_up_process(struct task_struct *p) | 2517 | int wake_up_process(struct task_struct *p) |
2501 | { | 2518 | { |
2502 | return try_to_wake_up(p, TASK_ALL, 0); | 2519 | return try_to_wake_up(p, TASK_ALL, 0); |
@@ -2805,7 +2822,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2805 | * combine the page table reload and the switch backend into | 2822 | * combine the page table reload and the switch backend into |
2806 | * one hypercall. | 2823 | * one hypercall. |
2807 | */ | 2824 | */ |
2808 | arch_enter_lazy_cpu_mode(); | 2825 | arch_start_context_switch(prev); |
2809 | 2826 | ||
2810 | if (unlikely(!mm)) { | 2827 | if (unlikely(!mm)) { |
2811 | next->active_mm = oldmm; | 2828 | next->active_mm = oldmm; |
@@ -2895,19 +2912,72 @@ unsigned long nr_iowait(void) | |||
2895 | return sum; | 2912 | return sum; |
2896 | } | 2913 | } |
2897 | 2914 | ||
2898 | unsigned long nr_active(void) | 2915 | /* Variables and functions for calc_load */ |
2916 | static atomic_long_t calc_load_tasks; | ||
2917 | static unsigned long calc_load_update; | ||
2918 | unsigned long avenrun[3]; | ||
2919 | EXPORT_SYMBOL(avenrun); | ||
2920 | |||
2921 | /** | ||
2922 | * get_avenrun - get the load average array | ||
2923 | * @loads: pointer to dest load array | ||
2924 | * @offset: offset to add | ||
2925 | * @shift: shift count to shift the result left | ||
2926 | * | ||
2927 | * These values are estimates at best, so no need for locking. | ||
2928 | */ | ||
2929 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
2899 | { | 2930 | { |
2900 | unsigned long i, running = 0, uninterruptible = 0; | 2931 | loads[0] = (avenrun[0] + offset) << shift; |
2932 | loads[1] = (avenrun[1] + offset) << shift; | ||
2933 | loads[2] = (avenrun[2] + offset) << shift; | ||
2934 | } | ||
2901 | 2935 | ||
2902 | for_each_online_cpu(i) { | 2936 | static unsigned long |
2903 | running += cpu_rq(i)->nr_running; | 2937 | calc_load(unsigned long load, unsigned long exp, unsigned long active) |
2904 | uninterruptible += cpu_rq(i)->nr_uninterruptible; | 2938 | { |
2905 | } | 2939 | load *= exp; |
2940 | load += active * (FIXED_1 - exp); | ||
2941 | return load >> FSHIFT; | ||
2942 | } | ||
2906 | 2943 | ||
2907 | if (unlikely((long)uninterruptible < 0)) | 2944 | /* |
2908 | uninterruptible = 0; | 2945 | * calc_load - update the avenrun load estimates 10 ticks after the |
2946 | * CPUs have updated calc_load_tasks. | ||
2947 | */ | ||
2948 | void calc_global_load(void) | ||
2949 | { | ||
2950 | unsigned long upd = calc_load_update + 10; | ||
2951 | long active; | ||
2952 | |||
2953 | if (time_before(jiffies, upd)) | ||
2954 | return; | ||
2955 | |||
2956 | active = atomic_long_read(&calc_load_tasks); | ||
2957 | active = active > 0 ? active * FIXED_1 : 0; | ||
2909 | 2958 | ||
2910 | return running + uninterruptible; | 2959 | avenrun[0] = calc_load(avenrun[0], EXP_1, active); |
2960 | avenrun[1] = calc_load(avenrun[1], EXP_5, active); | ||
2961 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | ||
2962 | |||
2963 | calc_load_update += LOAD_FREQ; | ||
2964 | } | ||
2965 | |||
2966 | /* | ||
2967 | * Either called from update_cpu_load() or from a cpu going idle | ||
2968 | */ | ||
2969 | static void calc_load_account_active(struct rq *this_rq) | ||
2970 | { | ||
2971 | long nr_active, delta; | ||
2972 | |||
2973 | nr_active = this_rq->nr_running; | ||
2974 | nr_active += (long) this_rq->nr_uninterruptible; | ||
2975 | |||
2976 | if (nr_active != this_rq->calc_load_active) { | ||
2977 | delta = nr_active - this_rq->calc_load_active; | ||
2978 | this_rq->calc_load_active = nr_active; | ||
2979 | atomic_long_add(delta, &calc_load_tasks); | ||
2980 | } | ||
2911 | } | 2981 | } |
2912 | 2982 | ||
2913 | /* | 2983 | /* |
@@ -2938,6 +3008,11 @@ static void update_cpu_load(struct rq *this_rq) | |||
2938 | new_load += scale-1; | 3008 | new_load += scale-1; |
2939 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | 3009 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; |
2940 | } | 3010 | } |
3011 | |||
3012 | if (time_after_eq(jiffies, this_rq->calc_load_update)) { | ||
3013 | this_rq->calc_load_update += LOAD_FREQ; | ||
3014 | calc_load_account_active(this_rq); | ||
3015 | } | ||
2941 | } | 3016 | } |
2942 | 3017 | ||
2943 | #ifdef CONFIG_SMP | 3018 | #ifdef CONFIG_SMP |
@@ -4279,10 +4354,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
4279 | static struct { | 4354 | static struct { |
4280 | atomic_t load_balancer; | 4355 | atomic_t load_balancer; |
4281 | cpumask_var_t cpu_mask; | 4356 | cpumask_var_t cpu_mask; |
4357 | cpumask_var_t ilb_grp_nohz_mask; | ||
4282 | } nohz ____cacheline_aligned = { | 4358 | } nohz ____cacheline_aligned = { |
4283 | .load_balancer = ATOMIC_INIT(-1), | 4359 | .load_balancer = ATOMIC_INIT(-1), |
4284 | }; | 4360 | }; |
4285 | 4361 | ||
4362 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
4363 | /** | ||
4364 | * lowest_flag_domain - Return lowest sched_domain containing flag. | ||
4365 | * @cpu: The cpu whose lowest level of sched domain is to | ||
4366 | * be returned. | ||
4367 | * @flag: The flag to check for the lowest sched_domain | ||
4368 | * for the given cpu. | ||
4369 | * | ||
4370 | * Returns the lowest sched_domain of a cpu which contains the given flag. | ||
4371 | */ | ||
4372 | static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) | ||
4373 | { | ||
4374 | struct sched_domain *sd; | ||
4375 | |||
4376 | for_each_domain(cpu, sd) | ||
4377 | if (sd && (sd->flags & flag)) | ||
4378 | break; | ||
4379 | |||
4380 | return sd; | ||
4381 | } | ||
4382 | |||
4383 | /** | ||
4384 | * for_each_flag_domain - Iterates over sched_domains containing the flag. | ||
4385 | * @cpu: The cpu whose domains we're iterating over. | ||
4386 | * @sd: variable holding the value of the power_savings_sd | ||
4387 | * for cpu. | ||
4388 | * @flag: The flag to filter the sched_domains to be iterated. | ||
4389 | * | ||
4390 | * Iterates over all the scheduler domains for a given cpu that has the 'flag' | ||
4391 | * set, starting from the lowest sched_domain to the highest. | ||
4392 | */ | ||
4393 | #define for_each_flag_domain(cpu, sd, flag) \ | ||
4394 | for (sd = lowest_flag_domain(cpu, flag); \ | ||
4395 | (sd && (sd->flags & flag)); sd = sd->parent) | ||
4396 | |||
4397 | /** | ||
4398 | * is_semi_idle_group - Checks if the given sched_group is semi-idle. | ||
4399 | * @ilb_group: group to be checked for semi-idleness | ||
4400 | * | ||
4401 | * Returns: 1 if the group is semi-idle. 0 otherwise. | ||
4402 | * | ||
4403 | * We define a sched_group to be semi idle if it has atleast one idle-CPU | ||
4404 | * and atleast one non-idle CPU. This helper function checks if the given | ||
4405 | * sched_group is semi-idle or not. | ||
4406 | */ | ||
4407 | static inline int is_semi_idle_group(struct sched_group *ilb_group) | ||
4408 | { | ||
4409 | cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask, | ||
4410 | sched_group_cpus(ilb_group)); | ||
4411 | |||
4412 | /* | ||
4413 | * A sched_group is semi-idle when it has atleast one busy cpu | ||
4414 | * and atleast one idle cpu. | ||
4415 | */ | ||
4416 | if (cpumask_empty(nohz.ilb_grp_nohz_mask)) | ||
4417 | return 0; | ||
4418 | |||
4419 | if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group))) | ||
4420 | return 0; | ||
4421 | |||
4422 | return 1; | ||
4423 | } | ||
4424 | /** | ||
4425 | * find_new_ilb - Finds the optimum idle load balancer for nomination. | ||
4426 | * @cpu: The cpu which is nominating a new idle_load_balancer. | ||
4427 | * | ||
4428 | * Returns: Returns the id of the idle load balancer if it exists, | ||
4429 | * Else, returns >= nr_cpu_ids. | ||
4430 | * | ||
4431 | * This algorithm picks the idle load balancer such that it belongs to a | ||
4432 | * semi-idle powersavings sched_domain. The idea is to try and avoid | ||
4433 | * completely idle packages/cores just for the purpose of idle load balancing | ||
4434 | * when there are other idle cpu's which are better suited for that job. | ||
4435 | */ | ||
4436 | static int find_new_ilb(int cpu) | ||
4437 | { | ||
4438 | struct sched_domain *sd; | ||
4439 | struct sched_group *ilb_group; | ||
4440 | |||
4441 | /* | ||
4442 | * Have idle load balancer selection from semi-idle packages only | ||
4443 | * when power-aware load balancing is enabled | ||
4444 | */ | ||
4445 | if (!(sched_smt_power_savings || sched_mc_power_savings)) | ||
4446 | goto out_done; | ||
4447 | |||
4448 | /* | ||
4449 | * Optimize for the case when we have no idle CPUs or only one | ||
4450 | * idle CPU. Don't walk the sched_domain hierarchy in such cases | ||
4451 | */ | ||
4452 | if (cpumask_weight(nohz.cpu_mask) < 2) | ||
4453 | goto out_done; | ||
4454 | |||
4455 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { | ||
4456 | ilb_group = sd->groups; | ||
4457 | |||
4458 | do { | ||
4459 | if (is_semi_idle_group(ilb_group)) | ||
4460 | return cpumask_first(nohz.ilb_grp_nohz_mask); | ||
4461 | |||
4462 | ilb_group = ilb_group->next; | ||
4463 | |||
4464 | } while (ilb_group != sd->groups); | ||
4465 | } | ||
4466 | |||
4467 | out_done: | ||
4468 | return cpumask_first(nohz.cpu_mask); | ||
4469 | } | ||
4470 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ | ||
4471 | static inline int find_new_ilb(int call_cpu) | ||
4472 | { | ||
4473 | return cpumask_first(nohz.cpu_mask); | ||
4474 | } | ||
4475 | #endif | ||
4476 | |||
4286 | /* | 4477 | /* |
4287 | * This routine will try to nominate the ilb (idle load balancing) | 4478 | * This routine will try to nominate the ilb (idle load balancing) |
4288 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle | 4479 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle |
@@ -4337,8 +4528,24 @@ int select_nohz_load_balancer(int stop_tick) | |||
4337 | /* make me the ilb owner */ | 4528 | /* make me the ilb owner */ |
4338 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) | 4529 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) |
4339 | return 1; | 4530 | return 1; |
4340 | } else if (atomic_read(&nohz.load_balancer) == cpu) | 4531 | } else if (atomic_read(&nohz.load_balancer) == cpu) { |
4532 | int new_ilb; | ||
4533 | |||
4534 | if (!(sched_smt_power_savings || | ||
4535 | sched_mc_power_savings)) | ||
4536 | return 1; | ||
4537 | /* | ||
4538 | * Check to see if there is a more power-efficient | ||
4539 | * ilb. | ||
4540 | */ | ||
4541 | new_ilb = find_new_ilb(cpu); | ||
4542 | if (new_ilb < nr_cpu_ids && new_ilb != cpu) { | ||
4543 | atomic_set(&nohz.load_balancer, -1); | ||
4544 | resched_cpu(new_ilb); | ||
4545 | return 0; | ||
4546 | } | ||
4341 | return 1; | 4547 | return 1; |
4548 | } | ||
4342 | } else { | 4549 | } else { |
4343 | if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) | 4550 | if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) |
4344 | return 0; | 4551 | return 0; |
@@ -4507,15 +4714,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) | |||
4507 | } | 4714 | } |
4508 | 4715 | ||
4509 | if (atomic_read(&nohz.load_balancer) == -1) { | 4716 | if (atomic_read(&nohz.load_balancer) == -1) { |
4510 | /* | 4717 | int ilb = find_new_ilb(cpu); |
4511 | * simple selection for now: Nominate the | ||
4512 | * first cpu in the nohz list to be the next | ||
4513 | * ilb owner. | ||
4514 | * | ||
4515 | * TBD: Traverse the sched domains and nominate | ||
4516 | * the nearest cpu in the nohz.cpu_mask. | ||
4517 | */ | ||
4518 | int ilb = cpumask_first(nohz.cpu_mask); | ||
4519 | 4718 | ||
4520 | if (ilb < nr_cpu_ids) | 4719 | if (ilb < nr_cpu_ids) |
4521 | resched_cpu(ilb); | 4720 | resched_cpu(ilb); |
@@ -5046,13 +5245,15 @@ pick_next_task(struct rq *rq) | |||
5046 | /* | 5245 | /* |
5047 | * schedule() is the main scheduler function. | 5246 | * schedule() is the main scheduler function. |
5048 | */ | 5247 | */ |
5049 | asmlinkage void __sched __schedule(void) | 5248 | asmlinkage void __sched schedule(void) |
5050 | { | 5249 | { |
5051 | struct task_struct *prev, *next; | 5250 | struct task_struct *prev, *next; |
5052 | unsigned long *switch_count; | 5251 | unsigned long *switch_count; |
5053 | struct rq *rq; | 5252 | struct rq *rq; |
5054 | int cpu; | 5253 | int cpu; |
5055 | 5254 | ||
5255 | need_resched: | ||
5256 | preempt_disable(); | ||
5056 | cpu = smp_processor_id(); | 5257 | cpu = smp_processor_id(); |
5057 | rq = cpu_rq(cpu); | 5258 | rq = cpu_rq(cpu); |
5058 | rcu_qsctr_inc(cpu); | 5259 | rcu_qsctr_inc(cpu); |
@@ -5109,15 +5310,9 @@ need_resched_nonpreemptible: | |||
5109 | 5310 | ||
5110 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 5311 | if (unlikely(reacquire_kernel_lock(current) < 0)) |
5111 | goto need_resched_nonpreemptible; | 5312 | goto need_resched_nonpreemptible; |
5112 | } | ||
5113 | 5313 | ||
5114 | asmlinkage void __sched schedule(void) | ||
5115 | { | ||
5116 | need_resched: | ||
5117 | preempt_disable(); | ||
5118 | __schedule(); | ||
5119 | preempt_enable_no_resched(); | 5314 | preempt_enable_no_resched(); |
5120 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 5315 | if (need_resched()) |
5121 | goto need_resched; | 5316 | goto need_resched; |
5122 | } | 5317 | } |
5123 | EXPORT_SYMBOL(schedule); | 5318 | EXPORT_SYMBOL(schedule); |
@@ -5260,7 +5455,7 @@ EXPORT_SYMBOL(default_wake_function); | |||
5260 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns | 5455 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns |
5261 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5456 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
5262 | */ | 5457 | */ |
5263 | void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5458 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
5264 | int nr_exclusive, int sync, void *key) | 5459 | int nr_exclusive, int sync, void *key) |
5265 | { | 5460 | { |
5266 | wait_queue_t *curr, *next; | 5461 | wait_queue_t *curr, *next; |
@@ -5280,6 +5475,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
5280 | * @mode: which threads | 5475 | * @mode: which threads |
5281 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 5476 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
5282 | * @key: is directly passed to the wakeup function | 5477 | * @key: is directly passed to the wakeup function |
5478 | * | ||
5479 | * It may be assumed that this function implies a write memory barrier before | ||
5480 | * changing the task state if and only if any tasks are woken up. | ||
5283 | */ | 5481 | */ |
5284 | void __wake_up(wait_queue_head_t *q, unsigned int mode, | 5482 | void __wake_up(wait_queue_head_t *q, unsigned int mode, |
5285 | int nr_exclusive, void *key) | 5483 | int nr_exclusive, void *key) |
@@ -5318,6 +5516,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
5318 | * with each other. This can prevent needless bouncing between CPUs. | 5516 | * with each other. This can prevent needless bouncing between CPUs. |
5319 | * | 5517 | * |
5320 | * On UP it can prevent extra preemption. | 5518 | * On UP it can prevent extra preemption. |
5519 | * | ||
5520 | * It may be assumed that this function implies a write memory barrier before | ||
5521 | * changing the task state if and only if any tasks are woken up. | ||
5321 | */ | 5522 | */ |
5322 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | 5523 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, |
5323 | int nr_exclusive, void *key) | 5524 | int nr_exclusive, void *key) |
@@ -5354,6 +5555,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ | |||
5354 | * awakened in the same order in which they were queued. | 5555 | * awakened in the same order in which they were queued. |
5355 | * | 5556 | * |
5356 | * See also complete_all(), wait_for_completion() and related routines. | 5557 | * See also complete_all(), wait_for_completion() and related routines. |
5558 | * | ||
5559 | * It may be assumed that this function implies a write memory barrier before | ||
5560 | * changing the task state if and only if any tasks are woken up. | ||
5357 | */ | 5561 | */ |
5358 | void complete(struct completion *x) | 5562 | void complete(struct completion *x) |
5359 | { | 5563 | { |
@@ -5371,6 +5575,9 @@ EXPORT_SYMBOL(complete); | |||
5371 | * @x: holds the state of this particular completion | 5575 | * @x: holds the state of this particular completion |
5372 | * | 5576 | * |
5373 | * This will wake up all threads waiting on this particular completion event. | 5577 | * This will wake up all threads waiting on this particular completion event. |
5578 | * | ||
5579 | * It may be assumed that this function implies a write memory barrier before | ||
5580 | * changing the task state if and only if any tasks are woken up. | ||
5374 | */ | 5581 | */ |
5375 | void complete_all(struct completion *x) | 5582 | void complete_all(struct completion *x) |
5376 | { | 5583 | { |
@@ -6529,8 +6736,9 @@ void sched_show_task(struct task_struct *p) | |||
6529 | #ifdef CONFIG_DEBUG_STACK_USAGE | 6736 | #ifdef CONFIG_DEBUG_STACK_USAGE |
6530 | free = stack_not_used(p); | 6737 | free = stack_not_used(p); |
6531 | #endif | 6738 | #endif |
6532 | printk(KERN_CONT "%5lu %5d %6d\n", free, | 6739 | printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
6533 | task_pid_nr(p), task_pid_nr(p->real_parent)); | 6740 | task_pid_nr(p), task_pid_nr(p->real_parent), |
6741 | (unsigned long)task_thread_info(p)->flags); | ||
6534 | 6742 | ||
6535 | show_stack(p, NULL); | 6743 | show_stack(p, NULL); |
6536 | } | 6744 | } |
@@ -7009,6 +7217,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
7009 | 7217 | ||
7010 | } | 7218 | } |
7011 | } | 7219 | } |
7220 | |||
7221 | /* | ||
7222 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
7223 | */ | ||
7224 | static void calc_global_load_remove(struct rq *rq) | ||
7225 | { | ||
7226 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | ||
7227 | } | ||
7012 | #endif /* CONFIG_HOTPLUG_CPU */ | 7228 | #endif /* CONFIG_HOTPLUG_CPU */ |
7013 | 7229 | ||
7014 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) | 7230 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) |
@@ -7243,6 +7459,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7243 | /* Update our root-domain */ | 7459 | /* Update our root-domain */ |
7244 | rq = cpu_rq(cpu); | 7460 | rq = cpu_rq(cpu); |
7245 | spin_lock_irqsave(&rq->lock, flags); | 7461 | spin_lock_irqsave(&rq->lock, flags); |
7462 | rq->calc_load_update = calc_load_update; | ||
7463 | rq->calc_load_active = 0; | ||
7246 | if (rq->rd) { | 7464 | if (rq->rd) { |
7247 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7465 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7248 | 7466 | ||
@@ -7282,7 +7500,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7282 | cpuset_unlock(); | 7500 | cpuset_unlock(); |
7283 | migrate_nr_uninterruptible(rq); | 7501 | migrate_nr_uninterruptible(rq); |
7284 | BUG_ON(rq->nr_running != 0); | 7502 | BUG_ON(rq->nr_running != 0); |
7285 | 7503 | calc_global_load_remove(rq); | |
7286 | /* | 7504 | /* |
7287 | * No need to migrate the tasks: it was best-effort if | 7505 | * No need to migrate the tasks: it was best-effort if |
7288 | * they didn't take sched_hotcpu_mutex. Just wake up | 7506 | * they didn't take sched_hotcpu_mutex. Just wake up |
@@ -7792,8 +8010,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
7792 | 8010 | ||
7793 | /* | 8011 | /* |
7794 | * The cpus mask in sched_group and sched_domain hangs off the end. | 8012 | * The cpus mask in sched_group and sched_domain hangs off the end. |
7795 | * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space | 8013 | * |
7796 | * for nr_cpu_ids < CONFIG_NR_CPUS. | 8014 | * ( See the the comments in include/linux/sched.h:struct sched_group |
8015 | * and struct sched_domain. ) | ||
7797 | */ | 8016 | */ |
7798 | struct static_sched_group { | 8017 | struct static_sched_group { |
7799 | struct sched_group sg; | 8018 | struct sched_group sg; |
@@ -7914,7 +8133,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
7914 | struct sched_domain *sd; | 8133 | struct sched_domain *sd; |
7915 | 8134 | ||
7916 | sd = &per_cpu(phys_domains, j).sd; | 8135 | sd = &per_cpu(phys_domains, j).sd; |
7917 | if (j != cpumask_first(sched_group_cpus(sd->groups))) { | 8136 | if (j != group_first_cpu(sd->groups)) { |
7918 | /* | 8137 | /* |
7919 | * Only add "power" once for each | 8138 | * Only add "power" once for each |
7920 | * physical package. | 8139 | * physical package. |
@@ -7992,7 +8211,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7992 | 8211 | ||
7993 | WARN_ON(!sd || !sd->groups); | 8212 | WARN_ON(!sd || !sd->groups); |
7994 | 8213 | ||
7995 | if (cpu != cpumask_first(sched_group_cpus(sd->groups))) | 8214 | if (cpu != group_first_cpu(sd->groups)) |
7996 | return; | 8215 | return; |
7997 | 8216 | ||
7998 | child = sd->child; | 8217 | child = sd->child; |
@@ -8977,6 +9196,8 @@ void __init sched_init(void) | |||
8977 | rq = cpu_rq(i); | 9196 | rq = cpu_rq(i); |
8978 | spin_lock_init(&rq->lock); | 9197 | spin_lock_init(&rq->lock); |
8979 | rq->nr_running = 0; | 9198 | rq->nr_running = 0; |
9199 | rq->calc_load_active = 0; | ||
9200 | rq->calc_load_update = jiffies + LOAD_FREQ; | ||
8980 | init_cfs_rq(&rq->cfs, rq); | 9201 | init_cfs_rq(&rq->cfs, rq); |
8981 | init_rt_rq(&rq->rt, rq); | 9202 | init_rt_rq(&rq->rt, rq); |
8982 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9203 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -9084,6 +9305,9 @@ void __init sched_init(void) | |||
9084 | * when this runqueue becomes "idle". | 9305 | * when this runqueue becomes "idle". |
9085 | */ | 9306 | */ |
9086 | init_idle(current, smp_processor_id()); | 9307 | init_idle(current, smp_processor_id()); |
9308 | |||
9309 | calc_load_update = jiffies + LOAD_FREQ; | ||
9310 | |||
9087 | /* | 9311 | /* |
9088 | * During early bootup we pretend to be a normal task: | 9312 | * During early bootup we pretend to be a normal task: |
9089 | */ | 9313 | */ |
@@ -9094,6 +9318,7 @@ void __init sched_init(void) | |||
9094 | #ifdef CONFIG_SMP | 9318 | #ifdef CONFIG_SMP |
9095 | #ifdef CONFIG_NO_HZ | 9319 | #ifdef CONFIG_NO_HZ |
9096 | alloc_bootmem_cpumask_var(&nohz.cpu_mask); | 9320 | alloc_bootmem_cpumask_var(&nohz.cpu_mask); |
9321 | alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask); | ||
9097 | #endif | 9322 | #endif |
9098 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | 9323 | alloc_bootmem_cpumask_var(&cpu_isolated_map); |
9099 | #endif /* SMP */ | 9324 | #endif /* SMP */ |
@@ -9839,6 +10064,13 @@ static int sched_rt_global_constraints(void) | |||
9839 | if (sysctl_sched_rt_period <= 0) | 10064 | if (sysctl_sched_rt_period <= 0) |
9840 | return -EINVAL; | 10065 | return -EINVAL; |
9841 | 10066 | ||
10067 | /* | ||
10068 | * There's always some RT tasks in the root group | ||
10069 | * -- migration, kstopmachine etc.. | ||
10070 | */ | ||
10071 | if (sysctl_sched_rt_runtime == 0) | ||
10072 | return -EBUSY; | ||
10073 | |||
9842 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 10074 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
9843 | for_each_possible_cpu(i) { | 10075 | for_each_possible_cpu(i) { |
9844 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 10076 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |