aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cpumask.h6
-rw-r--r--include/linux/cpuset.h7
-rw-r--r--init/main.c7
-rw-r--r--kernel/cpu.c30
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/sched.c108
-rw-r--r--kernel/sched_fair.c3
-rw-r--r--kernel/sched_rt.c7
8 files changed, 99 insertions, 71 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c24875bd9c5b..d614d2472798 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -359,13 +359,14 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
359 359
360/* 360/*
361 * The following particular system cpumasks and operations manage 361 * The following particular system cpumasks and operations manage
362 * possible, present and online cpus. Each of them is a fixed size 362 * possible, present, active and online cpus. Each of them is a fixed size
363 * bitmap of size NR_CPUS. 363 * bitmap of size NR_CPUS.
364 * 364 *
365 * #ifdef CONFIG_HOTPLUG_CPU 365 * #ifdef CONFIG_HOTPLUG_CPU
366 * cpu_possible_map - has bit 'cpu' set iff cpu is populatable 366 * cpu_possible_map - has bit 'cpu' set iff cpu is populatable
367 * cpu_present_map - has bit 'cpu' set iff cpu is populated 367 * cpu_present_map - has bit 'cpu' set iff cpu is populated
368 * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler 368 * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
369 * cpu_active_map - has bit 'cpu' set iff cpu available to migration
369 * #else 370 * #else
370 * cpu_possible_map - has bit 'cpu' set iff cpu is populated 371 * cpu_possible_map - has bit 'cpu' set iff cpu is populated
371 * cpu_present_map - copy of cpu_possible_map 372 * cpu_present_map - copy of cpu_possible_map
@@ -416,6 +417,7 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
416extern cpumask_t cpu_possible_map; 417extern cpumask_t cpu_possible_map;
417extern cpumask_t cpu_online_map; 418extern cpumask_t cpu_online_map;
418extern cpumask_t cpu_present_map; 419extern cpumask_t cpu_present_map;
420extern cpumask_t cpu_active_map;
419 421
420#if NR_CPUS > 1 422#if NR_CPUS > 1
421#define num_online_cpus() cpus_weight(cpu_online_map) 423#define num_online_cpus() cpus_weight(cpu_online_map)
@@ -424,6 +426,7 @@ extern cpumask_t cpu_present_map;
424#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) 426#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
425#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) 427#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
426#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) 428#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
429#define cpu_active(cpu) cpu_isset((cpu), cpu_active_map)
427#else 430#else
428#define num_online_cpus() 1 431#define num_online_cpus() 1
429#define num_possible_cpus() 1 432#define num_possible_cpus() 1
@@ -431,6 +434,7 @@ extern cpumask_t cpu_present_map;
431#define cpu_online(cpu) ((cpu) == 0) 434#define cpu_online(cpu) ((cpu) == 0)
432#define cpu_possible(cpu) ((cpu) == 0) 435#define cpu_possible(cpu) ((cpu) == 0)
433#define cpu_present(cpu) ((cpu) == 0) 436#define cpu_present(cpu) ((cpu) == 0)
437#define cpu_active(cpu) ((cpu) == 0)
434#endif 438#endif
435 439
436#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) 440#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 038578362b47..e8f450c499b0 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -78,6 +78,8 @@ extern void cpuset_track_online_nodes(void);
78 78
79extern int current_cpuset_is_being_rebound(void); 79extern int current_cpuset_is_being_rebound(void);
80 80
81extern void rebuild_sched_domains(void);
82
81#else /* !CONFIG_CPUSETS */ 83#else /* !CONFIG_CPUSETS */
82 84
83static inline int cpuset_init_early(void) { return 0; } 85static inline int cpuset_init_early(void) { return 0; }
@@ -156,6 +158,11 @@ static inline int current_cpuset_is_being_rebound(void)
156 return 0; 158 return 0;
157} 159}
158 160
161static inline void rebuild_sched_domains(void)
162{
163 partition_sched_domains(0, NULL, NULL);
164}
165
159#endif /* !CONFIG_CPUSETS */ 166#endif /* !CONFIG_CPUSETS */
160 167
161#endif /* _LINUX_CPUSET_H */ 168#endif /* _LINUX_CPUSET_H */
diff --git a/init/main.c b/init/main.c
index edeace036fd9..dd25259530ea 100644
--- a/init/main.c
+++ b/init/main.c
@@ -415,6 +415,13 @@ static void __init smp_init(void)
415{ 415{
416 unsigned int cpu; 416 unsigned int cpu;
417 417
418 /*
419 * Set up the current CPU as possible to migrate to.
420 * The other ones will be done by cpu_up/cpu_down()
421 */
422 cpu = smp_processor_id();
423 cpu_set(cpu, cpu_active_map);
424
418 /* FIXME: This should be done in userspace --RR */ 425 /* FIXME: This should be done in userspace --RR */
419 for_each_present_cpu(cpu) { 426 for_each_present_cpu(cpu) {
420 if (num_online_cpus() >= setup_max_cpus) 427 if (num_online_cpus() >= setup_max_cpus)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cfb1d43ab801..a1ac7ea245d7 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void)
64 cpu_hotplug.refcount = 0; 64 cpu_hotplug.refcount = 0;
65} 65}
66 66
67cpumask_t cpu_active_map;
68
67#ifdef CONFIG_HOTPLUG_CPU 69#ifdef CONFIG_HOTPLUG_CPU
68 70
69void get_online_cpus(void) 71void get_online_cpus(void)
@@ -291,11 +293,20 @@ int __ref cpu_down(unsigned int cpu)
291 int err = 0; 293 int err = 0;
292 294
293 cpu_maps_update_begin(); 295 cpu_maps_update_begin();
294 if (cpu_hotplug_disabled) 296
297 if (cpu_hotplug_disabled) {
295 err = -EBUSY; 298 err = -EBUSY;
296 else 299 goto out;
297 err = _cpu_down(cpu, 0); 300 }
301
302 cpu_clear(cpu, cpu_active_map);
303
304 err = _cpu_down(cpu, 0);
305
306 if (cpu_online(cpu))
307 cpu_set(cpu, cpu_active_map);
298 308
309out:
299 cpu_maps_update_done(); 310 cpu_maps_update_done();
300 return err; 311 return err;
301} 312}
@@ -355,11 +366,18 @@ int __cpuinit cpu_up(unsigned int cpu)
355 } 366 }
356 367
357 cpu_maps_update_begin(); 368 cpu_maps_update_begin();
358 if (cpu_hotplug_disabled) 369
370 if (cpu_hotplug_disabled) {
359 err = -EBUSY; 371 err = -EBUSY;
360 else 372 goto out;
361 err = _cpu_up(cpu, 0); 373 }
362 374
375 err = _cpu_up(cpu, 0);
376
377 if (cpu_online(cpu))
378 cpu_set(cpu, cpu_active_map);
379
380out:
363 cpu_maps_update_done(); 381 cpu_maps_update_done();
364 return err; 382 return err;
365} 383}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601947a8..3c3ef02f65f1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
564 * partition_sched_domains(). 564 * partition_sched_domains().
565 */ 565 */
566 566
567static void rebuild_sched_domains(void) 567void rebuild_sched_domains(void)
568{ 568{
569 struct kfifo *q; /* queue of cpusets to be scanned */ 569 struct kfifo *q; /* queue of cpusets to be scanned */
570 struct cpuset *cp; /* scans q */ 570 struct cpuset *cp; /* scans q */
diff --git a/kernel/sched.c b/kernel/sched.c
index 1ee18dbb4516..c237624a8a04 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2881,7 +2881,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2881 2881
2882 rq = task_rq_lock(p, &flags); 2882 rq = task_rq_lock(p, &flags);
2883 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2883 if (!cpu_isset(dest_cpu, p->cpus_allowed)
2884 || unlikely(cpu_is_offline(dest_cpu))) 2884 || unlikely(!cpu_active(dest_cpu)))
2885 goto out; 2885 goto out;
2886 2886
2887 /* force the process onto the specified CPU */ 2887 /* force the process onto the specified CPU */
@@ -3849,7 +3849,7 @@ int select_nohz_load_balancer(int stop_tick)
3849 /* 3849 /*
3850 * If we are going offline and still the leader, give up! 3850 * If we are going offline and still the leader, give up!
3851 */ 3851 */
3852 if (cpu_is_offline(cpu) && 3852 if (!cpu_active(cpu) &&
3853 atomic_read(&nohz.load_balancer) == cpu) { 3853 atomic_read(&nohz.load_balancer) == cpu) {
3854 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3854 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3855 BUG(); 3855 BUG();
@@ -5876,7 +5876,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5876 struct rq *rq_dest, *rq_src; 5876 struct rq *rq_dest, *rq_src;
5877 int ret = 0, on_rq; 5877 int ret = 0, on_rq;
5878 5878
5879 if (unlikely(cpu_is_offline(dest_cpu))) 5879 if (unlikely(!cpu_active(dest_cpu)))
5880 return ret; 5880 return ret;
5881 5881
5882 rq_src = cpu_rq(src_cpu); 5882 rq_src = cpu_rq(src_cpu);
@@ -7554,18 +7554,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
7554} 7554}
7555 7555
7556/* 7556/*
7557 * Free current domain masks.
7558 * Called after all cpus are attached to NULL domain.
7559 */
7560static void free_sched_domains(void)
7561{
7562 ndoms_cur = 0;
7563 if (doms_cur != &fallback_doms)
7564 kfree(doms_cur);
7565 doms_cur = &fallback_doms;
7566}
7567
7568/*
7569 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7557 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
7570 * For now this just excludes isolated cpus, but could be used to 7558 * For now this just excludes isolated cpus, but could be used to
7571 * exclude other special cases in the future. 7559 * exclude other special cases in the future.
@@ -7643,7 +7631,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7643 * ownership of it and will kfree it when done with it. If the caller 7631 * ownership of it and will kfree it when done with it. If the caller
7644 * failed the kmalloc call, then it can pass in doms_new == NULL, 7632 * failed the kmalloc call, then it can pass in doms_new == NULL,
7645 * and partition_sched_domains() will fallback to the single partition 7633 * and partition_sched_domains() will fallback to the single partition
7646 * 'fallback_doms'. 7634 * 'fallback_doms', it also forces the domains to be rebuilt.
7647 * 7635 *
7648 * Call with hotplug lock held 7636 * Call with hotplug lock held
7649 */ 7637 */
@@ -7657,12 +7645,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7657 /* always unregister in case we don't destroy any domains */ 7645 /* always unregister in case we don't destroy any domains */
7658 unregister_sched_domain_sysctl(); 7646 unregister_sched_domain_sysctl();
7659 7647
7660 if (doms_new == NULL) { 7648 if (doms_new == NULL)
7661 ndoms_new = 1; 7649 ndoms_new = 0;
7662 doms_new = &fallback_doms;
7663 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7664 dattr_new = NULL;
7665 }
7666 7650
7667 /* Destroy deleted domains */ 7651 /* Destroy deleted domains */
7668 for (i = 0; i < ndoms_cur; i++) { 7652 for (i = 0; i < ndoms_cur; i++) {
@@ -7677,6 +7661,14 @@ match1:
7677 ; 7661 ;
7678 } 7662 }
7679 7663
7664 if (doms_new == NULL) {
7665 ndoms_cur = 0;
7666 ndoms_new = 1;
7667 doms_new = &fallback_doms;
7668 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7669 dattr_new = NULL;
7670 }
7671
7680 /* Build new domains */ 7672 /* Build new domains */
7681 for (i = 0; i < ndoms_new; i++) { 7673 for (i = 0; i < ndoms_new; i++) {
7682 for (j = 0; j < ndoms_cur; j++) { 7674 for (j = 0; j < ndoms_cur; j++) {
@@ -7707,17 +7699,10 @@ match2:
7707#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7699#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
7708int arch_reinit_sched_domains(void) 7700int arch_reinit_sched_domains(void)
7709{ 7701{
7710 int err;
7711
7712 get_online_cpus(); 7702 get_online_cpus();
7713 mutex_lock(&sched_domains_mutex); 7703 rebuild_sched_domains();
7714 detach_destroy_domains(&cpu_online_map);
7715 free_sched_domains();
7716 err = arch_init_sched_domains(&cpu_online_map);
7717 mutex_unlock(&sched_domains_mutex);
7718 put_online_cpus(); 7704 put_online_cpus();
7719 7705 return 0;
7720 return err;
7721} 7706}
7722 7707
7723static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 7708static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7783,59 +7768,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7783} 7768}
7784#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ 7769#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7785 7770
7771#ifndef CONFIG_CPUSETS
7786/* 7772/*
7787 * Force a reinitialization of the sched domains hierarchy. The domains 7773 * Add online and remove offline CPUs from the scheduler domains.
7788 * and groups cannot be updated in place without racing with the balancing 7774 * When cpusets are enabled they take over this function.
7789 * code, so we temporarily attach all running cpus to the NULL domain
7790 * which will prevent rebalancing while the sched domains are recalculated.
7791 */ 7775 */
7792static int update_sched_domains(struct notifier_block *nfb, 7776static int update_sched_domains(struct notifier_block *nfb,
7793 unsigned long action, void *hcpu) 7777 unsigned long action, void *hcpu)
7794{ 7778{
7779 switch (action) {
7780 case CPU_ONLINE:
7781 case CPU_ONLINE_FROZEN:
7782 case CPU_DEAD:
7783 case CPU_DEAD_FROZEN:
7784 partition_sched_domains(0, NULL, NULL);
7785 return NOTIFY_OK;
7786
7787 default:
7788 return NOTIFY_DONE;
7789 }
7790}
7791#endif
7792
7793static int update_runtime(struct notifier_block *nfb,
7794 unsigned long action, void *hcpu)
7795{
7795 int cpu = (int)(long)hcpu; 7796 int cpu = (int)(long)hcpu;
7796 7797
7797 switch (action) { 7798 switch (action) {
7798 case CPU_DOWN_PREPARE: 7799 case CPU_DOWN_PREPARE:
7799 case CPU_DOWN_PREPARE_FROZEN: 7800 case CPU_DOWN_PREPARE_FROZEN:
7800 disable_runtime(cpu_rq(cpu)); 7801 disable_runtime(cpu_rq(cpu));
7801 /* fall-through */
7802 case CPU_UP_PREPARE:
7803 case CPU_UP_PREPARE_FROZEN:
7804 detach_destroy_domains(&cpu_online_map);
7805 free_sched_domains();
7806 return NOTIFY_OK; 7802 return NOTIFY_OK;
7807 7803
7808
7809 case CPU_DOWN_FAILED: 7804 case CPU_DOWN_FAILED:
7810 case CPU_DOWN_FAILED_FROZEN: 7805 case CPU_DOWN_FAILED_FROZEN:
7811 case CPU_ONLINE: 7806 case CPU_ONLINE:
7812 case CPU_ONLINE_FROZEN: 7807 case CPU_ONLINE_FROZEN:
7813 enable_runtime(cpu_rq(cpu)); 7808 enable_runtime(cpu_rq(cpu));
7814 /* fall-through */ 7809 return NOTIFY_OK;
7815 case CPU_UP_CANCELED: 7810
7816 case CPU_UP_CANCELED_FROZEN:
7817 case CPU_DEAD:
7818 case CPU_DEAD_FROZEN:
7819 /*
7820 * Fall through and re-initialise the domains.
7821 */
7822 break;
7823 default: 7811 default:
7824 return NOTIFY_DONE; 7812 return NOTIFY_DONE;
7825 } 7813 }
7826
7827#ifndef CONFIG_CPUSETS
7828 /*
7829 * Create default domain partitioning if cpusets are disabled.
7830 * Otherwise we let cpusets rebuild the domains based on the
7831 * current setup.
7832 */
7833
7834 /* The hotplug lock is already held by cpu_up/cpu_down */
7835 arch_init_sched_domains(&cpu_online_map);
7836#endif
7837
7838 return NOTIFY_OK;
7839} 7814}
7840 7815
7841void __init sched_init_smp(void) 7816void __init sched_init_smp(void)
@@ -7855,8 +7830,15 @@ void __init sched_init_smp(void)
7855 cpu_set(smp_processor_id(), non_isolated_cpus); 7830 cpu_set(smp_processor_id(), non_isolated_cpus);
7856 mutex_unlock(&sched_domains_mutex); 7831 mutex_unlock(&sched_domains_mutex);
7857 put_online_cpus(); 7832 put_online_cpus();
7833
7834#ifndef CONFIG_CPUSETS
7858 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7835 /* XXX: Theoretical race here - CPU may be hotplugged now */
7859 hotcpu_notifier(update_sched_domains, 0); 7836 hotcpu_notifier(update_sched_domains, 0);
7837#endif
7838
7839 /* RT runtime code needs to handle some hotplug events */
7840 hotcpu_notifier(update_runtime, 0);
7841
7860 init_hrtick(); 7842 init_hrtick();
7861 7843
7862 /* Move init over to a non-isolated CPU */ 7844 /* Move init over to a non-isolated CPU */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2aa987027d6..d924c679dfac 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1004,6 +1004,8 @@ static void yield_task_fair(struct rq *rq)
1004 * not idle and an idle cpu is available. The span of cpus to 1004 * not idle and an idle cpu is available. The span of cpus to
1005 * search starts with cpus closest then further out as needed, 1005 * search starts with cpus closest then further out as needed,
1006 * so we always favor a closer, idle cpu. 1006 * so we always favor a closer, idle cpu.
1007 * Domains may include CPUs that are not usable for migration,
1008 * hence we need to mask them out (cpu_active_map)
1007 * 1009 *
1008 * Returns the CPU we should wake onto. 1010 * Returns the CPU we should wake onto.
1009 */ 1011 */
@@ -1031,6 +1033,7 @@ static int wake_idle(int cpu, struct task_struct *p)
1031 || ((sd->flags & SD_WAKE_IDLE_FAR) 1033 || ((sd->flags & SD_WAKE_IDLE_FAR)
1032 && !task_hot(p, task_rq(p)->clock, sd))) { 1034 && !task_hot(p, task_rq(p)->clock, sd))) {
1033 cpus_and(tmp, sd->span, p->cpus_allowed); 1035 cpus_and(tmp, sd->span, p->cpus_allowed);
1036 cpus_and(tmp, tmp, cpu_active_map);
1034 for_each_cpu_mask(i, tmp) { 1037 for_each_cpu_mask(i, tmp) {
1035 if (idle_cpu(i)) { 1038 if (idle_cpu(i)) {
1036 if (i != task_cpu(p)) { 1039 if (i != task_cpu(p)) {
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d3d1cccb3d7b..50735bb96149 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -934,6 +934,13 @@ static int find_lowest_rq(struct task_struct *task)
934 return -1; /* No targets found */ 934 return -1; /* No targets found */
935 935
936 /* 936 /*
937 * Only consider CPUs that are usable for migration.
938 * I guess we might want to change cpupri_find() to ignore those
939 * in the first place.
940 */
941 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
942
943 /*
937 * At this point we have built a mask of cpus representing the 944 * At this point we have built a mask of cpus representing the
938 * lowest priority tasks in the system. Now we want to elect 945 * lowest priority tasks in the system. Now we want to elect
939 * the best one based on our affinity and topology. 946 * the best one based on our affinity and topology.