diff options
| -rw-r--r-- | Documentation/scheduler/sched-domains.txt | 32 | ||||
| -rw-r--r-- | kernel/sched.c | 11 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 5 |
3 files changed, 37 insertions, 11 deletions
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index 373ceacc367..b7ee379b651 100644 --- a/Documentation/scheduler/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt | |||
| @@ -1,8 +1,7 @@ | |||
| 1 | Each CPU has a "base" scheduling domain (struct sched_domain). These are | 1 | Each CPU has a "base" scheduling domain (struct sched_domain). The domain |
| 2 | accessed via cpu_sched_domain(i) and this_sched_domain() macros. The domain | ||
| 3 | hierarchy is built from these base domains via the ->parent pointer. ->parent | 2 | hierarchy is built from these base domains via the ->parent pointer. ->parent |
| 4 | MUST be NULL terminated, and domain structures should be per-CPU as they | 3 | MUST be NULL terminated, and domain structures should be per-CPU as they are |
| 5 | are locklessly updated. | 4 | locklessly updated. |
| 6 | 5 | ||
| 7 | Each scheduling domain spans a number of CPUs (stored in the ->span field). | 6 | Each scheduling domain spans a number of CPUs (stored in the ->span field). |
| 8 | A domain's span MUST be a superset of it child's span (this restriction could | 7 | A domain's span MUST be a superset of it child's span (this restriction could |
| @@ -26,11 +25,26 @@ is treated as one entity. The load of a group is defined as the sum of the | |||
| 26 | load of each of its member CPUs, and only when the load of a group becomes | 25 | load of each of its member CPUs, and only when the load of a group becomes |
| 27 | out of balance are tasks moved between groups. | 26 | out of balance are tasks moved between groups. |
| 28 | 27 | ||
| 29 | In kernel/sched.c, rebalance_tick is run periodically on each CPU. This | 28 | In kernel/sched.c, trigger_load_balance() is run periodically on each CPU |
| 30 | function takes its CPU's base sched domain and checks to see if has reached | 29 | through scheduler_tick(). It raises a softirq after the next regularly scheduled |
| 31 | its rebalance interval. If so, then it will run load_balance on that domain. | 30 | rebalancing event for the current runqueue has arrived. The actual load |
| 32 | rebalance_tick then checks the parent sched_domain (if it exists), and the | 31 | balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run |
| 33 | parent of the parent and so forth. | 32 | in softirq context (SCHED_SOFTIRQ). |
| 33 | |||
| 34 | The latter function takes two arguments: the current CPU and whether it was idle | ||
| 35 | at the time the scheduler_tick() happened and iterates over all sched domains | ||
| 36 | our CPU is on, starting from its base domain and going up the ->parent chain. | ||
| 37 | While doing that, it checks to see if the current domain has exhausted its | ||
| 38 | rebalance interval. If so, it runs load_balance() on that domain. It then checks | ||
| 39 | the parent sched_domain (if it exists), and the parent of the parent and so | ||
| 40 | forth. | ||
| 41 | |||
| 42 | Initially, load_balance() finds the busiest group in the current sched domain. | ||
| 43 | If it succeeds, it looks for the busiest runqueue of all the CPUs' runqueues in | ||
| 44 | that group. If it manages to find such a runqueue, it locks both our initial | ||
| 45 | CPU's runqueue and the newly found busiest one and starts moving tasks from it | ||
| 46 | to our runqueue. The exact number of tasks amounts to an imbalance previously | ||
| 47 | computed while iterating over this sched domain's groups. | ||
| 34 | 48 | ||
| 35 | *** Implementing sched domains *** | 49 | *** Implementing sched domains *** |
| 36 | The "base" domain will "span" the first level of the hierarchy. In the case | 50 | The "base" domain will "span" the first level of the hierarchy. In the case |
diff --git a/kernel/sched.c b/kernel/sched.c index f592ce6f861..a8845516ace 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -5011,6 +5011,17 @@ recheck: | |||
| 5011 | return -EINVAL; | 5011 | return -EINVAL; |
| 5012 | } | 5012 | } |
| 5013 | 5013 | ||
| 5014 | /* | ||
| 5015 | * If not changing anything there's no need to proceed further: | ||
| 5016 | */ | ||
| 5017 | if (unlikely(policy == p->policy && (!rt_policy(policy) || | ||
| 5018 | param->sched_priority == p->rt_priority))) { | ||
| 5019 | |||
| 5020 | __task_rq_unlock(rq); | ||
| 5021 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
| 5022 | return 0; | ||
| 5023 | } | ||
| 5024 | |||
| 5014 | #ifdef CONFIG_RT_GROUP_SCHED | 5025 | #ifdef CONFIG_RT_GROUP_SCHED |
| 5015 | if (user) { | 5026 | if (user) { |
| 5016 | /* | 5027 | /* |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3f7ec9e27ee..c7ec5c8e7b4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | 22 | ||
| 23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
| 24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
| 25 | #include <linux/cpumask.h> | ||
| 25 | 26 | ||
| 26 | /* | 27 | /* |
| 27 | * Targeted preemption latency for CPU-bound tasks: | 28 | * Targeted preemption latency for CPU-bound tasks: |
| @@ -3850,8 +3851,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
| 3850 | interval = msecs_to_jiffies(interval); | 3851 | interval = msecs_to_jiffies(interval); |
| 3851 | if (unlikely(!interval)) | 3852 | if (unlikely(!interval)) |
| 3852 | interval = 1; | 3853 | interval = 1; |
| 3853 | if (interval > HZ*NR_CPUS/10) | 3854 | if (interval > HZ*num_online_cpus()/10) |
| 3854 | interval = HZ*NR_CPUS/10; | 3855 | interval = HZ*num_online_cpus()/10; |
| 3855 | 3856 | ||
| 3856 | need_serialize = sd->flags & SD_SERIALIZE; | 3857 | need_serialize = sd->flags & SD_SERIALIZE; |
| 3857 | 3858 | ||
