aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorGregory Haskins <ghaskins@novell.com>2008-01-25 15:08:18 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:18 -0500
commit57d885fea0da0e9541d7730a9e1dcf734981a173 (patch)
tree80f08ea6ed506e2aac30c89b8ae1eee7f008a378 /kernel
parent7f51f298204ec0528422cd9b23feac12612c5665 (diff)
sched: add sched-domain roots
We add the notion of a root-domain which will be used later to rescope global variables to per-domain variables. Each exclusive cpuset essentially defines an island domain by fully partitioning the member cpus from any other cpuset. However, we currently still maintain some policy/state as global variables which transcend all cpusets. Consider, for instance, rt-overload state. Whenever a new exclusive cpuset is created, we also create a new root-domain object and move each cpu member to the root-domain's span. By default the system creates a single root-domain with all cpus as members (mimicking the global state we have today). We add some plumbing for storing class specific data in our root-domain. Whenever a RQ is switching root-domains (because of repartitioning) we give each sched_class the opportunity to remove any state from its old domain and add state to the new one. This logic doesn't have any clients yet but it will later in the series. Signed-off-by: Gregory Haskins <ghaskins@novell.com> CC: Christoph Lameter <clameter@sgi.com> CC: Paul Jackson <pj@sgi.com> CC: Simon Derr <simon.derr@bull.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c121
1 files changed, 118 insertions, 3 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 36bd8ff2a669..34b7d721d735 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -349,6 +349,28 @@ struct rt_rq {
349 int overloaded; 349 int overloaded;
350}; 350};
351 351
352#ifdef CONFIG_SMP
353
354/*
355 * We add the notion of a root-domain which will be used to define per-domain
356 * variables. Each exclusive cpuset essentially defines an island domain by
357 * fully partitioning the member cpus from any other cpuset. Whenever a new
358 * exclusive cpuset is created, we also create and attach a new root-domain
359 * object.
360 *
361 * By default the system creates a single root-domain with all cpus as
362 * members (mimicking the global state we have today).
363 */
364struct root_domain {
365 atomic_t refcount;
366 cpumask_t span;
367 cpumask_t online;
368};
369
370static struct root_domain def_root_domain;
371
372#endif
373
352/* 374/*
353 * This is the main, per-CPU runqueue data structure. 375 * This is the main, per-CPU runqueue data structure.
354 * 376 *
@@ -406,6 +428,7 @@ struct rq {
406 atomic_t nr_iowait; 428 atomic_t nr_iowait;
407 429
408#ifdef CONFIG_SMP 430#ifdef CONFIG_SMP
431 struct root_domain *rd;
409 struct sched_domain *sd; 432 struct sched_domain *sd;
410 433
411 /* For active balancing */ 434 /* For active balancing */
@@ -5550,6 +5573,15 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5550 case CPU_ONLINE_FROZEN: 5573 case CPU_ONLINE_FROZEN:
5551 /* Strictly unnecessary, as first user will wake it. */ 5574 /* Strictly unnecessary, as first user will wake it. */
5552 wake_up_process(cpu_rq(cpu)->migration_thread); 5575 wake_up_process(cpu_rq(cpu)->migration_thread);
5576
5577 /* Update our root-domain */
5578 rq = cpu_rq(cpu);
5579 spin_lock_irqsave(&rq->lock, flags);
5580 if (rq->rd) {
5581 BUG_ON(!cpu_isset(cpu, rq->rd->span));
5582 cpu_set(cpu, rq->rd->online);
5583 }
5584 spin_unlock_irqrestore(&rq->lock, flags);
5553 break; 5585 break;
5554 5586
5555#ifdef CONFIG_HOTPLUG_CPU 5587#ifdef CONFIG_HOTPLUG_CPU
@@ -5600,6 +5632,17 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5600 } 5632 }
5601 spin_unlock_irq(&rq->lock); 5633 spin_unlock_irq(&rq->lock);
5602 break; 5634 break;
5635
5636 case CPU_DOWN_PREPARE:
5637 /* Update our root-domain */
5638 rq = cpu_rq(cpu);
5639 spin_lock_irqsave(&rq->lock, flags);
5640 if (rq->rd) {
5641 BUG_ON(!cpu_isset(cpu, rq->rd->span));
5642 cpu_clear(cpu, rq->rd->online);
5643 }
5644 spin_unlock_irqrestore(&rq->lock, flags);
5645 break;
5603#endif 5646#endif
5604 } 5647 }
5605 return NOTIFY_OK; 5648 return NOTIFY_OK;
@@ -5788,11 +5831,69 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5788 return 1; 5831 return 1;
5789} 5832}
5790 5833
5834static void rq_attach_root(struct rq *rq, struct root_domain *rd)
5835{
5836 unsigned long flags;
5837 const struct sched_class *class;
5838
5839 spin_lock_irqsave(&rq->lock, flags);
5840
5841 if (rq->rd) {
5842 struct root_domain *old_rd = rq->rd;
5843
5844 for (class = sched_class_highest; class; class = class->next)
5845 if (class->leave_domain)
5846 class->leave_domain(rq);
5847
5848 if (atomic_dec_and_test(&old_rd->refcount))
5849 kfree(old_rd);
5850 }
5851
5852 atomic_inc(&rd->refcount);
5853 rq->rd = rd;
5854
5855 for (class = sched_class_highest; class; class = class->next)
5856 if (class->join_domain)
5857 class->join_domain(rq);
5858
5859 spin_unlock_irqrestore(&rq->lock, flags);
5860}
5861
5862static void init_rootdomain(struct root_domain *rd, const cpumask_t *map)
5863{
5864 memset(rd, 0, sizeof(*rd));
5865
5866 rd->span = *map;
5867 cpus_and(rd->online, rd->span, cpu_online_map);
5868}
5869
5870static void init_defrootdomain(void)
5871{
5872 cpumask_t cpus = CPU_MASK_ALL;
5873
5874 init_rootdomain(&def_root_domain, &cpus);
5875 atomic_set(&def_root_domain.refcount, 1);
5876}
5877
5878static struct root_domain *alloc_rootdomain(const cpumask_t *map)
5879{
5880 struct root_domain *rd;
5881
5882 rd = kmalloc(sizeof(*rd), GFP_KERNEL);
5883 if (!rd)
5884 return NULL;
5885
5886 init_rootdomain(rd, map);
5887
5888 return rd;
5889}
5890
5791/* 5891/*
5792 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must 5892 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
5793 * hold the hotplug lock. 5893 * hold the hotplug lock.
5794 */ 5894 */
5795static void cpu_attach_domain(struct sched_domain *sd, int cpu) 5895static void cpu_attach_domain(struct sched_domain *sd,
5896 struct root_domain *rd, int cpu)
5796{ 5897{
5797 struct rq *rq = cpu_rq(cpu); 5898 struct rq *rq = cpu_rq(cpu);
5798 struct sched_domain *tmp; 5899 struct sched_domain *tmp;
@@ -5817,6 +5918,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5817 5918
5818 sched_domain_debug(sd, cpu); 5919 sched_domain_debug(sd, cpu);
5819 5920
5921 rq_attach_root(rq, rd);
5820 rcu_assign_pointer(rq->sd, sd); 5922 rcu_assign_pointer(rq->sd, sd);
5821} 5923}
5822 5924
@@ -6185,6 +6287,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6185static int build_sched_domains(const cpumask_t *cpu_map) 6287static int build_sched_domains(const cpumask_t *cpu_map)
6186{ 6288{
6187 int i; 6289 int i;
6290 struct root_domain *rd;
6188#ifdef CONFIG_NUMA 6291#ifdef CONFIG_NUMA
6189 struct sched_group **sched_group_nodes = NULL; 6292 struct sched_group **sched_group_nodes = NULL;
6190 int sd_allnodes = 0; 6293 int sd_allnodes = 0;
@@ -6201,6 +6304,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6201 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; 6304 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
6202#endif 6305#endif
6203 6306
6307 rd = alloc_rootdomain(cpu_map);
6308 if (!rd) {
6309 printk(KERN_WARNING "Cannot alloc root domain\n");
6310 return -ENOMEM;
6311 }
6312
6204 /* 6313 /*
6205 * Set up domains for cpus specified by the cpu_map. 6314 * Set up domains for cpus specified by the cpu_map.
6206 */ 6315 */
@@ -6417,7 +6526,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6417#else 6526#else
6418 sd = &per_cpu(phys_domains, i); 6527 sd = &per_cpu(phys_domains, i);
6419#endif 6528#endif
6420 cpu_attach_domain(sd, i); 6529 cpu_attach_domain(sd, rd, i);
6421 } 6530 }
6422 6531
6423 return 0; 6532 return 0;
@@ -6475,7 +6584,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
6475 unregister_sched_domain_sysctl(); 6584 unregister_sched_domain_sysctl();
6476 6585
6477 for_each_cpu_mask(i, *cpu_map) 6586 for_each_cpu_mask(i, *cpu_map)
6478 cpu_attach_domain(NULL, i); 6587 cpu_attach_domain(NULL, &def_root_domain, i);
6479 synchronize_sched(); 6588 synchronize_sched();
6480 arch_destroy_sched_domains(cpu_map); 6589 arch_destroy_sched_domains(cpu_map);
6481} 6590}
@@ -6727,6 +6836,10 @@ void __init sched_init(void)
6727 int highest_cpu = 0; 6836 int highest_cpu = 0;
6728 int i, j; 6837 int i, j;
6729 6838
6839#ifdef CONFIG_SMP
6840 init_defrootdomain();
6841#endif
6842
6730 for_each_possible_cpu(i) { 6843 for_each_possible_cpu(i) {
6731 struct rt_prio_array *array; 6844 struct rt_prio_array *array;
6732 struct rq *rq; 6845 struct rq *rq;
@@ -6765,6 +6878,8 @@ void __init sched_init(void)
6765 rq->cpu_load[j] = 0; 6878 rq->cpu_load[j] = 0;
6766#ifdef CONFIG_SMP 6879#ifdef CONFIG_SMP
6767 rq->sd = NULL; 6880 rq->sd = NULL;
6881 rq->rd = NULL;
6882 rq_attach_root(rq, &def_root_domain);
6768 rq->active_balance = 0; 6883 rq->active_balance = 0;
6769 rq->next_balance = jiffies; 6884 rq->next_balance = jiffies;
6770 rq->push_cpu = 0; 6885 rq->push_cpu = 0;