diff options
| -rw-r--r-- | include/linux/cpumask.h | 6 | ||||
| -rw-r--r-- | include/linux/cpuset.h | 7 | ||||
| -rw-r--r-- | include/linux/sched.h | 11 | ||||
| -rw-r--r-- | init/main.c | 7 | ||||
| -rw-r--r-- | kernel/cpu.c | 40 | ||||
| -rw-r--r-- | kernel/cpuset.c | 2 | ||||
| -rw-r--r-- | kernel/sched.c | 108 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 3 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 75 |
9 files changed, 159 insertions, 100 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c24875bd9c5b..d614d2472798 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
| @@ -359,13 +359,14 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, | |||
| 359 | 359 | ||
| 360 | /* | 360 | /* |
| 361 | * The following particular system cpumasks and operations manage | 361 | * The following particular system cpumasks and operations manage |
| 362 | * possible, present and online cpus. Each of them is a fixed size | 362 | * possible, present, active and online cpus. Each of them is a fixed size |
| 363 | * bitmap of size NR_CPUS. | 363 | * bitmap of size NR_CPUS. |
| 364 | * | 364 | * |
| 365 | * #ifdef CONFIG_HOTPLUG_CPU | 365 | * #ifdef CONFIG_HOTPLUG_CPU |
| 366 | * cpu_possible_map - has bit 'cpu' set iff cpu is populatable | 366 | * cpu_possible_map - has bit 'cpu' set iff cpu is populatable |
| 367 | * cpu_present_map - has bit 'cpu' set iff cpu is populated | 367 | * cpu_present_map - has bit 'cpu' set iff cpu is populated |
| 368 | * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler | 368 | * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler |
| 369 | * cpu_active_map - has bit 'cpu' set iff cpu available to migration | ||
| 369 | * #else | 370 | * #else |
| 370 | * cpu_possible_map - has bit 'cpu' set iff cpu is populated | 371 | * cpu_possible_map - has bit 'cpu' set iff cpu is populated |
| 371 | * cpu_present_map - copy of cpu_possible_map | 372 | * cpu_present_map - copy of cpu_possible_map |
| @@ -416,6 +417,7 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, | |||
| 416 | extern cpumask_t cpu_possible_map; | 417 | extern cpumask_t cpu_possible_map; |
| 417 | extern cpumask_t cpu_online_map; | 418 | extern cpumask_t cpu_online_map; |
| 418 | extern cpumask_t cpu_present_map; | 419 | extern cpumask_t cpu_present_map; |
| 420 | extern cpumask_t cpu_active_map; | ||
| 419 | 421 | ||
| 420 | #if NR_CPUS > 1 | 422 | #if NR_CPUS > 1 |
| 421 | #define num_online_cpus() cpus_weight(cpu_online_map) | 423 | #define num_online_cpus() cpus_weight(cpu_online_map) |
| @@ -424,6 +426,7 @@ extern cpumask_t cpu_present_map; | |||
| 424 | #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) | 426 | #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) |
| 425 | #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) | 427 | #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) |
| 426 | #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) | 428 | #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) |
| 429 | #define cpu_active(cpu) cpu_isset((cpu), cpu_active_map) | ||
| 427 | #else | 430 | #else |
| 428 | #define num_online_cpus() 1 | 431 | #define num_online_cpus() 1 |
| 429 | #define num_possible_cpus() 1 | 432 | #define num_possible_cpus() 1 |
| @@ -431,6 +434,7 @@ extern cpumask_t cpu_present_map; | |||
| 431 | #define cpu_online(cpu) ((cpu) == 0) | 434 | #define cpu_online(cpu) ((cpu) == 0) |
| 432 | #define cpu_possible(cpu) ((cpu) == 0) | 435 | #define cpu_possible(cpu) ((cpu) == 0) |
| 433 | #define cpu_present(cpu) ((cpu) == 0) | 436 | #define cpu_present(cpu) ((cpu) == 0) |
| 437 | #define cpu_active(cpu) ((cpu) == 0) | ||
| 434 | #endif | 438 | #endif |
| 435 | 439 | ||
| 436 | #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) | 440 | #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) |
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 038578362b47..e8f450c499b0 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
| @@ -78,6 +78,8 @@ extern void cpuset_track_online_nodes(void); | |||
| 78 | 78 | ||
| 79 | extern int current_cpuset_is_being_rebound(void); | 79 | extern int current_cpuset_is_being_rebound(void); |
| 80 | 80 | ||
| 81 | extern void rebuild_sched_domains(void); | ||
| 82 | |||
| 81 | #else /* !CONFIG_CPUSETS */ | 83 | #else /* !CONFIG_CPUSETS */ |
| 82 | 84 | ||
| 83 | static inline int cpuset_init_early(void) { return 0; } | 85 | static inline int cpuset_init_early(void) { return 0; } |
| @@ -156,6 +158,11 @@ static inline int current_cpuset_is_being_rebound(void) | |||
| 156 | return 0; | 158 | return 0; |
| 157 | } | 159 | } |
| 158 | 160 | ||
| 161 | static inline void rebuild_sched_domains(void) | ||
| 162 | { | ||
| 163 | partition_sched_domains(0, NULL, NULL); | ||
| 164 | } | ||
| 165 | |||
| 159 | #endif /* !CONFIG_CPUSETS */ | 166 | #endif /* !CONFIG_CPUSETS */ |
| 160 | 167 | ||
| 161 | #endif /* _LINUX_CPUSET_H */ | 168 | #endif /* _LINUX_CPUSET_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1941d8b5cf11..26da921530fe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -824,7 +824,16 @@ extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, | |||
| 824 | struct sched_domain_attr *dattr_new); | 824 | struct sched_domain_attr *dattr_new); |
| 825 | extern int arch_reinit_sched_domains(void); | 825 | extern int arch_reinit_sched_domains(void); |
| 826 | 826 | ||
| 827 | #endif /* CONFIG_SMP */ | 827 | #else /* CONFIG_SMP */ |
| 828 | |||
| 829 | struct sched_domain_attr; | ||
| 830 | |||
| 831 | static inline void | ||
| 832 | partition_sched_domains(int ndoms_new, cpumask_t *doms_new, | ||
| 833 | struct sched_domain_attr *dattr_new) | ||
| 834 | { | ||
| 835 | } | ||
| 836 | #endif /* !CONFIG_SMP */ | ||
| 828 | 837 | ||
| 829 | struct io_context; /* See blkdev.h */ | 838 | struct io_context; /* See blkdev.h */ |
| 830 | #define NGROUPS_SMALL 32 | 839 | #define NGROUPS_SMALL 32 |
diff --git a/init/main.c b/init/main.c index edeace036fd9..dd25259530ea 100644 --- a/init/main.c +++ b/init/main.c | |||
| @@ -415,6 +415,13 @@ static void __init smp_init(void) | |||
| 415 | { | 415 | { |
| 416 | unsigned int cpu; | 416 | unsigned int cpu; |
| 417 | 417 | ||
| 418 | /* | ||
| 419 | * Set up the current CPU as possible to migrate to. | ||
| 420 | * The other ones will be done by cpu_up/cpu_down() | ||
| 421 | */ | ||
| 422 | cpu = smp_processor_id(); | ||
| 423 | cpu_set(cpu, cpu_active_map); | ||
| 424 | |||
| 418 | /* FIXME: This should be done in userspace --RR */ | 425 | /* FIXME: This should be done in userspace --RR */ |
| 419 | for_each_present_cpu(cpu) { | 426 | for_each_present_cpu(cpu) { |
| 420 | if (num_online_cpus() >= setup_max_cpus) | 427 | if (num_online_cpus() >= setup_max_cpus) |
diff --git a/kernel/cpu.c b/kernel/cpu.c index cfb1d43ab801..033603c1d7c3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void) | |||
| 64 | cpu_hotplug.refcount = 0; | 64 | cpu_hotplug.refcount = 0; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | cpumask_t cpu_active_map; | ||
| 68 | |||
| 67 | #ifdef CONFIG_HOTPLUG_CPU | 69 | #ifdef CONFIG_HOTPLUG_CPU |
| 68 | 70 | ||
| 69 | void get_online_cpus(void) | 71 | void get_online_cpus(void) |
| @@ -291,11 +293,30 @@ int __ref cpu_down(unsigned int cpu) | |||
| 291 | int err = 0; | 293 | int err = 0; |
| 292 | 294 | ||
| 293 | cpu_maps_update_begin(); | 295 | cpu_maps_update_begin(); |
| 294 | if (cpu_hotplug_disabled) | 296 | |
| 297 | if (cpu_hotplug_disabled) { | ||
| 295 | err = -EBUSY; | 298 | err = -EBUSY; |
| 296 | else | 299 | goto out; |
| 297 | err = _cpu_down(cpu, 0); | 300 | } |
| 301 | |||
| 302 | cpu_clear(cpu, cpu_active_map); | ||
| 303 | |||
| 304 | /* | ||
| 305 | * Make sure the all cpus did the reschedule and are not | ||
| 306 | * using stale version of the cpu_active_map. | ||
| 307 | * This is not strictly necessary becuase stop_machine() | ||
| 308 | * that we run down the line already provides the required | ||
| 309 | * synchronization. But it's really a side effect and we do not | ||
| 310 | * want to depend on the innards of the stop_machine here. | ||
| 311 | */ | ||
| 312 | synchronize_sched(); | ||
| 313 | |||
| 314 | err = _cpu_down(cpu, 0); | ||
| 298 | 315 | ||
| 316 | if (cpu_online(cpu)) | ||
| 317 | cpu_set(cpu, cpu_active_map); | ||
| 318 | |||
| 319 | out: | ||
| 299 | cpu_maps_update_done(); | 320 | cpu_maps_update_done(); |
| 300 | return err; | 321 | return err; |
| 301 | } | 322 | } |
| @@ -355,11 +376,18 @@ int __cpuinit cpu_up(unsigned int cpu) | |||
| 355 | } | 376 | } |
| 356 | 377 | ||
| 357 | cpu_maps_update_begin(); | 378 | cpu_maps_update_begin(); |
| 358 | if (cpu_hotplug_disabled) | 379 | |
| 380 | if (cpu_hotplug_disabled) { | ||
| 359 | err = -EBUSY; | 381 | err = -EBUSY; |
| 360 | else | 382 | goto out; |
| 361 | err = _cpu_up(cpu, 0); | 383 | } |
| 384 | |||
| 385 | err = _cpu_up(cpu, 0); | ||
| 362 | 386 | ||
| 387 | if (cpu_online(cpu)) | ||
| 388 | cpu_set(cpu, cpu_active_map); | ||
| 389 | |||
| 390 | out: | ||
| 363 | cpu_maps_update_done(); | 391 | cpu_maps_update_done(); |
| 364 | return err; | 392 | return err; |
| 365 | } | 393 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 459d601947a8..3c3ef02f65f1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
| 564 | * partition_sched_domains(). | 564 | * partition_sched_domains(). |
| 565 | */ | 565 | */ |
| 566 | 566 | ||
| 567 | static void rebuild_sched_domains(void) | 567 | void rebuild_sched_domains(void) |
| 568 | { | 568 | { |
| 569 | struct kfifo *q; /* queue of cpusets to be scanned */ | 569 | struct kfifo *q; /* queue of cpusets to be scanned */ |
| 570 | struct cpuset *cp; /* scans q */ | 570 | struct cpuset *cp; /* scans q */ |
diff --git a/kernel/sched.c b/kernel/sched.c index c13c75e9f9f7..85cf246cfdf5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -2802,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
| 2802 | 2802 | ||
| 2803 | rq = task_rq_lock(p, &flags); | 2803 | rq = task_rq_lock(p, &flags); |
| 2804 | if (!cpu_isset(dest_cpu, p->cpus_allowed) | 2804 | if (!cpu_isset(dest_cpu, p->cpus_allowed) |
| 2805 | || unlikely(cpu_is_offline(dest_cpu))) | 2805 | || unlikely(!cpu_active(dest_cpu))) |
| 2806 | goto out; | 2806 | goto out; |
| 2807 | 2807 | ||
| 2808 | /* force the process onto the specified CPU */ | 2808 | /* force the process onto the specified CPU */ |
| @@ -3770,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
| 3770 | /* | 3770 | /* |
| 3771 | * If we are going offline and still the leader, give up! | 3771 | * If we are going offline and still the leader, give up! |
| 3772 | */ | 3772 | */ |
| 3773 | if (cpu_is_offline(cpu) && | 3773 | if (!cpu_active(cpu) && |
| 3774 | atomic_read(&nohz.load_balancer) == cpu) { | 3774 | atomic_read(&nohz.load_balancer) == cpu) { |
| 3775 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | 3775 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) |
| 3776 | BUG(); | 3776 | BUG(); |
| @@ -5794,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
| 5794 | struct rq *rq_dest, *rq_src; | 5794 | struct rq *rq_dest, *rq_src; |
| 5795 | int ret = 0, on_rq; | 5795 | int ret = 0, on_rq; |
| 5796 | 5796 | ||
| 5797 | if (unlikely(cpu_is_offline(dest_cpu))) | 5797 | if (unlikely(!cpu_active(dest_cpu))) |
| 5798 | return ret; | 5798 | return ret; |
| 5799 | 5799 | ||
| 5800 | rq_src = cpu_rq(src_cpu); | 5800 | rq_src = cpu_rq(src_cpu); |
| @@ -7472,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void) | |||
| 7472 | } | 7472 | } |
| 7473 | 7473 | ||
| 7474 | /* | 7474 | /* |
| 7475 | * Free current domain masks. | ||
| 7476 | * Called after all cpus are attached to NULL domain. | ||
| 7477 | */ | ||
| 7478 | static void free_sched_domains(void) | ||
| 7479 | { | ||
| 7480 | ndoms_cur = 0; | ||
| 7481 | if (doms_cur != &fallback_doms) | ||
| 7482 | kfree(doms_cur); | ||
| 7483 | doms_cur = &fallback_doms; | ||
| 7484 | } | ||
| 7485 | |||
| 7486 | /* | ||
| 7487 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 7475 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
| 7488 | * For now this just excludes isolated cpus, but could be used to | 7476 | * For now this just excludes isolated cpus, but could be used to |
| 7489 | * exclude other special cases in the future. | 7477 | * exclude other special cases in the future. |
| @@ -7561,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
| 7561 | * ownership of it and will kfree it when done with it. If the caller | 7549 | * ownership of it and will kfree it when done with it. If the caller |
| 7562 | * failed the kmalloc call, then it can pass in doms_new == NULL, | 7550 | * failed the kmalloc call, then it can pass in doms_new == NULL, |
| 7563 | * and partition_sched_domains() will fallback to the single partition | 7551 | * and partition_sched_domains() will fallback to the single partition |
| 7564 | * 'fallback_doms'. | 7552 | * 'fallback_doms', it also forces the domains to be rebuilt. |
| 7565 | * | 7553 | * |
| 7566 | * Call with hotplug lock held | 7554 | * Call with hotplug lock held |
| 7567 | */ | 7555 | */ |
| @@ -7575,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, | |||
| 7575 | /* always unregister in case we don't destroy any domains */ | 7563 | /* always unregister in case we don't destroy any domains */ |
| 7576 | unregister_sched_domain_sysctl(); | 7564 | unregister_sched_domain_sysctl(); |
| 7577 | 7565 | ||
| 7578 | if (doms_new == NULL) { | 7566 | if (doms_new == NULL) |
| 7579 | ndoms_new = 1; | 7567 | ndoms_new = 0; |
| 7580 | doms_new = &fallback_doms; | ||
| 7581 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | ||
| 7582 | dattr_new = NULL; | ||
| 7583 | } | ||
| 7584 | 7568 | ||
| 7585 | /* Destroy deleted domains */ | 7569 | /* Destroy deleted domains */ |
| 7586 | for (i = 0; i < ndoms_cur; i++) { | 7570 | for (i = 0; i < ndoms_cur; i++) { |
| @@ -7595,6 +7579,14 @@ match1: | |||
| 7595 | ; | 7579 | ; |
| 7596 | } | 7580 | } |
| 7597 | 7581 | ||
| 7582 | if (doms_new == NULL) { | ||
| 7583 | ndoms_cur = 0; | ||
| 7584 | ndoms_new = 1; | ||
| 7585 | doms_new = &fallback_doms; | ||
| 7586 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | ||
| 7587 | dattr_new = NULL; | ||
| 7588 | } | ||
| 7589 | |||
| 7598 | /* Build new domains */ | 7590 | /* Build new domains */ |
| 7599 | for (i = 0; i < ndoms_new; i++) { | 7591 | for (i = 0; i < ndoms_new; i++) { |
| 7600 | for (j = 0; j < ndoms_cur; j++) { | 7592 | for (j = 0; j < ndoms_cur; j++) { |
| @@ -7625,17 +7617,10 @@ match2: | |||
| 7625 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 7617 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
| 7626 | int arch_reinit_sched_domains(void) | 7618 | int arch_reinit_sched_domains(void) |
| 7627 | { | 7619 | { |
| 7628 | int err; | ||
| 7629 | |||
| 7630 | get_online_cpus(); | 7620 | get_online_cpus(); |
| 7631 | mutex_lock(&sched_domains_mutex); | 7621 | rebuild_sched_domains(); |
| 7632 | detach_destroy_domains(&cpu_online_map); | ||
| 7633 | free_sched_domains(); | ||
| 7634 | err = arch_init_sched_domains(&cpu_online_map); | ||
| 7635 | mutex_unlock(&sched_domains_mutex); | ||
| 7636 | put_online_cpus(); | 7622 | put_online_cpus(); |
| 7637 | 7623 | return 0; | |
| 7638 | return err; | ||
| 7639 | } | 7624 | } |
| 7640 | 7625 | ||
| 7641 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | 7626 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) |
| @@ -7701,59 +7686,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | |||
| 7701 | } | 7686 | } |
| 7702 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | 7687 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
| 7703 | 7688 | ||
| 7689 | #ifndef CONFIG_CPUSETS | ||
| 7704 | /* | 7690 | /* |
| 7705 | * Force a reinitialization of the sched domains hierarchy. The domains | 7691 | * Add online and remove offline CPUs from the scheduler domains. |
| 7706 | * and groups cannot be updated in place without racing with the balancing | 7692 | * When cpusets are enabled they take over this function. |
| 7707 | * code, so we temporarily attach all running cpus to the NULL domain | ||
| 7708 | * which will prevent rebalancing while the sched domains are recalculated. | ||
| 7709 | */ | 7693 | */ |
| 7710 | static int update_sched_domains(struct notifier_block *nfb, | 7694 | static int update_sched_domains(struct notifier_block *nfb, |
| 7711 | unsigned long action, void *hcpu) | 7695 | unsigned long action, void *hcpu) |
| 7712 | { | 7696 | { |
| 7697 | switch (action) { | ||
| 7698 | case CPU_ONLINE: | ||
| 7699 | case CPU_ONLINE_FROZEN: | ||
| 7700 | case CPU_DEAD: | ||
| 7701 | case CPU_DEAD_FROZEN: | ||
| 7702 | partition_sched_domains(0, NULL, NULL); | ||
| 7703 | return NOTIFY_OK; | ||
| 7704 | |||
| 7705 | default: | ||
| 7706 | return NOTIFY_DONE; | ||
| 7707 | } | ||
| 7708 | } | ||
| 7709 | #endif | ||
| 7710 | |||
| 7711 | static int update_runtime(struct notifier_block *nfb, | ||
| 7712 | unsigned long action, void *hcpu) | ||
| 7713 | { | ||
| 7713 | int cpu = (int)(long)hcpu; | 7714 | int cpu = (int)(long)hcpu; |
| 7714 | 7715 | ||
| 7715 | switch (action) { | 7716 | switch (action) { |
| 7716 | case CPU_DOWN_PREPARE: | 7717 | case CPU_DOWN_PREPARE: |
| 7717 | case CPU_DOWN_PREPARE_FROZEN: | 7718 | case CPU_DOWN_PREPARE_FROZEN: |
| 7718 | disable_runtime(cpu_rq(cpu)); | 7719 | disable_runtime(cpu_rq(cpu)); |
| 7719 | /* fall-through */ | ||
| 7720 | case CPU_UP_PREPARE: | ||
| 7721 | case CPU_UP_PREPARE_FROZEN: | ||
| 7722 | detach_destroy_domains(&cpu_online_map); | ||
| 7723 | free_sched_domains(); | ||
| 7724 | return NOTIFY_OK; | 7720 | return NOTIFY_OK; |
| 7725 | 7721 | ||
| 7726 | |||
| 7727 | case CPU_DOWN_FAILED: | 7722 | case CPU_DOWN_FAILED: |
| 7728 | case CPU_DOWN_FAILED_FROZEN: | 7723 | case CPU_DOWN_FAILED_FROZEN: |
| 7729 | case CPU_ONLINE: | 7724 | case CPU_ONLINE: |
| 7730 | case CPU_ONLINE_FROZEN: | 7725 | case CPU_ONLINE_FROZEN: |
| 7731 | enable_runtime(cpu_rq(cpu)); | 7726 | enable_runtime(cpu_rq(cpu)); |
| 7732 | /* fall-through */ | 7727 | return NOTIFY_OK; |
| 7733 | case CPU_UP_CANCELED: | 7728 | |
| 7734 | case CPU_UP_CANCELED_FROZEN: | ||
| 7735 | case CPU_DEAD: | ||
| 7736 | case CPU_DEAD_FROZEN: | ||
| 7737 | /* | ||
| 7738 | * Fall through and re-initialise the domains. | ||
| 7739 | */ | ||
| 7740 | break; | ||
| 7741 | default: | 7729 | default: |
| 7742 | return NOTIFY_DONE; | 7730 | return NOTIFY_DONE; |
| 7743 | } | 7731 | } |
| 7744 | |||
| 7745 | #ifndef CONFIG_CPUSETS | ||
| 7746 | /* | ||
| 7747 | * Create default domain partitioning if cpusets are disabled. | ||
| 7748 | * Otherwise we let cpusets rebuild the domains based on the | ||
| 7749 | * current setup. | ||
| 7750 | */ | ||
| 7751 | |||
| 7752 | /* The hotplug lock is already held by cpu_up/cpu_down */ | ||
| 7753 | arch_init_sched_domains(&cpu_online_map); | ||
| 7754 | #endif | ||
| 7755 | |||
| 7756 | return NOTIFY_OK; | ||
| 7757 | } | 7732 | } |
| 7758 | 7733 | ||
| 7759 | void __init sched_init_smp(void) | 7734 | void __init sched_init_smp(void) |
| @@ -7773,8 +7748,15 @@ void __init sched_init_smp(void) | |||
| 7773 | cpu_set(smp_processor_id(), non_isolated_cpus); | 7748 | cpu_set(smp_processor_id(), non_isolated_cpus); |
| 7774 | mutex_unlock(&sched_domains_mutex); | 7749 | mutex_unlock(&sched_domains_mutex); |
| 7775 | put_online_cpus(); | 7750 | put_online_cpus(); |
| 7751 | |||
| 7752 | #ifndef CONFIG_CPUSETS | ||
| 7776 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 7753 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
| 7777 | hotcpu_notifier(update_sched_domains, 0); | 7754 | hotcpu_notifier(update_sched_domains, 0); |
| 7755 | #endif | ||
| 7756 | |||
| 7757 | /* RT runtime code needs to handle some hotplug events */ | ||
| 7758 | hotcpu_notifier(update_runtime, 0); | ||
| 7759 | |||
| 7778 | init_hrtick(); | 7760 | init_hrtick(); |
| 7779 | 7761 | ||
| 7780 | /* Move init over to a non-isolated CPU */ | 7762 | /* Move init over to a non-isolated CPU */ |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6893b3ed65fe..7f700263f04c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -1003,6 +1003,8 @@ static void yield_task_fair(struct rq *rq) | |||
| 1003 | * not idle and an idle cpu is available. The span of cpus to | 1003 | * not idle and an idle cpu is available. The span of cpus to |
| 1004 | * search starts with cpus closest then further out as needed, | 1004 | * search starts with cpus closest then further out as needed, |
| 1005 | * so we always favor a closer, idle cpu. | 1005 | * so we always favor a closer, idle cpu. |
| 1006 | * Domains may include CPUs that are not usable for migration, | ||
| 1007 | * hence we need to mask them out (cpu_active_map) | ||
| 1006 | * | 1008 | * |
| 1007 | * Returns the CPU we should wake onto. | 1009 | * Returns the CPU we should wake onto. |
| 1008 | */ | 1010 | */ |
| @@ -1030,6 +1032,7 @@ static int wake_idle(int cpu, struct task_struct *p) | |||
| 1030 | || ((sd->flags & SD_WAKE_IDLE_FAR) | 1032 | || ((sd->flags & SD_WAKE_IDLE_FAR) |
| 1031 | && !task_hot(p, task_rq(p)->clock, sd))) { | 1033 | && !task_hot(p, task_rq(p)->clock, sd))) { |
| 1032 | cpus_and(tmp, sd->span, p->cpus_allowed); | 1034 | cpus_and(tmp, sd->span, p->cpus_allowed); |
| 1035 | cpus_and(tmp, tmp, cpu_active_map); | ||
| 1033 | for_each_cpu_mask(i, tmp) { | 1036 | for_each_cpu_mask(i, tmp) { |
| 1034 | if (idle_cpu(i)) { | 1037 | if (idle_cpu(i)) { |
| 1035 | if (i != task_cpu(p)) { | 1038 | if (i != task_cpu(p)) { |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 147004c651c0..24621cea8bb0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -601,11 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
| 601 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 601 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
| 602 | return; | 602 | return; |
| 603 | 603 | ||
| 604 | if (rt_se->nr_cpus_allowed == 1) | 604 | list_add_tail(&rt_se->run_list, queue); |
| 605 | list_add(&rt_se->run_list, queue); | ||
| 606 | else | ||
| 607 | list_add_tail(&rt_se->run_list, queue); | ||
| 608 | |||
| 609 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 605 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
| 610 | 606 | ||
| 611 | inc_rt_tasks(rt_se, rt_rq); | 607 | inc_rt_tasks(rt_se, rt_rq); |
| @@ -690,32 +686,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
| 690 | * Put task to the end of the run list without the overhead of dequeue | 686 | * Put task to the end of the run list without the overhead of dequeue |
| 691 | * followed by enqueue. | 687 | * followed by enqueue. |
| 692 | */ | 688 | */ |
| 693 | static | 689 | static void |
| 694 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | 690 | requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) |
| 695 | { | 691 | { |
| 696 | struct rt_prio_array *array = &rt_rq->active; | ||
| 697 | |||
| 698 | if (on_rt_rq(rt_se)) { | 692 | if (on_rt_rq(rt_se)) { |
| 699 | list_del_init(&rt_se->run_list); | 693 | struct rt_prio_array *array = &rt_rq->active; |
| 700 | list_add_tail(&rt_se->run_list, | 694 | struct list_head *queue = array->queue + rt_se_prio(rt_se); |
| 701 | array->queue + rt_se_prio(rt_se)); | 695 | |
| 696 | if (head) | ||
| 697 | list_move(&rt_se->run_list, queue); | ||
| 698 | else | ||
| 699 | list_move_tail(&rt_se->run_list, queue); | ||
| 702 | } | 700 | } |
| 703 | } | 701 | } |
| 704 | 702 | ||
| 705 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 703 | static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) |
| 706 | { | 704 | { |
| 707 | struct sched_rt_entity *rt_se = &p->rt; | 705 | struct sched_rt_entity *rt_se = &p->rt; |
| 708 | struct rt_rq *rt_rq; | 706 | struct rt_rq *rt_rq; |
| 709 | 707 | ||
| 710 | for_each_sched_rt_entity(rt_se) { | 708 | for_each_sched_rt_entity(rt_se) { |
| 711 | rt_rq = rt_rq_of_se(rt_se); | 709 | rt_rq = rt_rq_of_se(rt_se); |
| 712 | requeue_rt_entity(rt_rq, rt_se); | 710 | requeue_rt_entity(rt_rq, rt_se, head); |
| 713 | } | 711 | } |
| 714 | } | 712 | } |
| 715 | 713 | ||
| 716 | static void yield_task_rt(struct rq *rq) | 714 | static void yield_task_rt(struct rq *rq) |
| 717 | { | 715 | { |
| 718 | requeue_task_rt(rq, rq->curr); | 716 | requeue_task_rt(rq, rq->curr, 0); |
| 719 | } | 717 | } |
| 720 | 718 | ||
| 721 | #ifdef CONFIG_SMP | 719 | #ifdef CONFIG_SMP |
| @@ -755,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
| 755 | */ | 753 | */ |
| 756 | return task_cpu(p); | 754 | return task_cpu(p); |
| 757 | } | 755 | } |
| 756 | |||
| 757 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | ||
| 758 | { | ||
| 759 | cpumask_t mask; | ||
| 760 | |||
| 761 | if (rq->curr->rt.nr_cpus_allowed == 1) | ||
| 762 | return; | ||
| 763 | |||
| 764 | if (p->rt.nr_cpus_allowed != 1 | ||
| 765 | && cpupri_find(&rq->rd->cpupri, p, &mask)) | ||
| 766 | return; | ||
| 767 | |||
| 768 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
| 769 | return; | ||
| 770 | |||
| 771 | /* | ||
| 772 | * There appears to be other cpus that can accept | ||
| 773 | * current and none to run 'p', so lets reschedule | ||
| 774 | * to try and push current away: | ||
| 775 | */ | ||
| 776 | requeue_task_rt(rq, p, 1); | ||
| 777 | resched_task(rq->curr); | ||
| 778 | } | ||
| 779 | |||
| 758 | #endif /* CONFIG_SMP */ | 780 | #endif /* CONFIG_SMP */ |
| 759 | 781 | ||
| 760 | /* | 782 | /* |
| @@ -780,18 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | |||
| 780 | * to move current somewhere else, making room for our non-migratable | 802 | * to move current somewhere else, making room for our non-migratable |
| 781 | * task. | 803 | * task. |
| 782 | */ | 804 | */ |
| 783 | if((p->prio == rq->curr->prio) | 805 | if (p->prio == rq->curr->prio && !need_resched()) |
| 784 | && p->rt.nr_cpus_allowed == 1 | 806 | check_preempt_equal_prio(rq, p); |
| 785 | && rq->curr->rt.nr_cpus_allowed != 1) { | ||
| 786 | cpumask_t mask; | ||
| 787 | |||
| 788 | if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
| 789 | /* | ||
| 790 | * There appears to be other cpus that can accept | ||
| 791 | * current, so lets reschedule to try and push it away | ||
| 792 | */ | ||
| 793 | resched_task(rq->curr); | ||
| 794 | } | ||
| 795 | #endif | 807 | #endif |
| 796 | } | 808 | } |
| 797 | 809 | ||
| @@ -924,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task) | |||
| 924 | return -1; /* No targets found */ | 936 | return -1; /* No targets found */ |
| 925 | 937 | ||
| 926 | /* | 938 | /* |
| 939 | * Only consider CPUs that are usable for migration. | ||
| 940 | * I guess we might want to change cpupri_find() to ignore those | ||
| 941 | * in the first place. | ||
| 942 | */ | ||
| 943 | cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); | ||
| 944 | |||
| 945 | /* | ||
| 927 | * At this point we have built a mask of cpus representing the | 946 | * At this point we have built a mask of cpus representing the |
| 928 | * lowest priority tasks in the system. Now we want to elect | 947 | * lowest priority tasks in the system. Now we want to elect |
| 929 | * the best one based on our affinity and topology. | 948 | * the best one based on our affinity and topology. |
| @@ -1417,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
| 1417 | * on the queue: | 1436 | * on the queue: |
| 1418 | */ | 1437 | */ |
| 1419 | if (p->rt.run_list.prev != p->rt.run_list.next) { | 1438 | if (p->rt.run_list.prev != p->rt.run_list.next) { |
| 1420 | requeue_task_rt(rq, p); | 1439 | requeue_task_rt(rq, p, 0); |
| 1421 | set_tsk_need_resched(p); | 1440 | set_tsk_need_resched(p); |
| 1422 | } | 1441 | } |
| 1423 | } | 1442 | } |
