aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c40
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/sched.c108
-rw-r--r--kernel/sched_fair.c3
-rw-r--r--kernel/sched_rt.c75
5 files changed, 130 insertions, 98 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cfb1d43ab801..033603c1d7c3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void)
64 cpu_hotplug.refcount = 0; 64 cpu_hotplug.refcount = 0;
65} 65}
66 66
67cpumask_t cpu_active_map;
68
67#ifdef CONFIG_HOTPLUG_CPU 69#ifdef CONFIG_HOTPLUG_CPU
68 70
69void get_online_cpus(void) 71void get_online_cpus(void)
@@ -291,11 +293,30 @@ int __ref cpu_down(unsigned int cpu)
291 int err = 0; 293 int err = 0;
292 294
293 cpu_maps_update_begin(); 295 cpu_maps_update_begin();
294 if (cpu_hotplug_disabled) 296
297 if (cpu_hotplug_disabled) {
295 err = -EBUSY; 298 err = -EBUSY;
296 else 299 goto out;
297 err = _cpu_down(cpu, 0); 300 }
301
302 cpu_clear(cpu, cpu_active_map);
303
304 /*
305 * Make sure the all cpus did the reschedule and are not
306 * using stale version of the cpu_active_map.
307 * This is not strictly necessary becuase stop_machine()
308 * that we run down the line already provides the required
309 * synchronization. But it's really a side effect and we do not
310 * want to depend on the innards of the stop_machine here.
311 */
312 synchronize_sched();
313
314 err = _cpu_down(cpu, 0);
298 315
316 if (cpu_online(cpu))
317 cpu_set(cpu, cpu_active_map);
318
319out:
299 cpu_maps_update_done(); 320 cpu_maps_update_done();
300 return err; 321 return err;
301} 322}
@@ -355,11 +376,18 @@ int __cpuinit cpu_up(unsigned int cpu)
355 } 376 }
356 377
357 cpu_maps_update_begin(); 378 cpu_maps_update_begin();
358 if (cpu_hotplug_disabled) 379
380 if (cpu_hotplug_disabled) {
359 err = -EBUSY; 381 err = -EBUSY;
360 else 382 goto out;
361 err = _cpu_up(cpu, 0); 383 }
384
385 err = _cpu_up(cpu, 0);
362 386
387 if (cpu_online(cpu))
388 cpu_set(cpu, cpu_active_map);
389
390out:
363 cpu_maps_update_done(); 391 cpu_maps_update_done();
364 return err; 392 return err;
365} 393}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601947a8..3c3ef02f65f1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
564 * partition_sched_domains(). 564 * partition_sched_domains().
565 */ 565 */
566 566
567static void rebuild_sched_domains(void) 567void rebuild_sched_domains(void)
568{ 568{
569 struct kfifo *q; /* queue of cpusets to be scanned */ 569 struct kfifo *q; /* queue of cpusets to be scanned */
570 struct cpuset *cp; /* scans q */ 570 struct cpuset *cp; /* scans q */
diff --git a/kernel/sched.c b/kernel/sched.c
index c13c75e9f9f7..85cf246cfdf5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2802,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2802 2802
2803 rq = task_rq_lock(p, &flags); 2803 rq = task_rq_lock(p, &flags);
2804 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2804 if (!cpu_isset(dest_cpu, p->cpus_allowed)
2805 || unlikely(cpu_is_offline(dest_cpu))) 2805 || unlikely(!cpu_active(dest_cpu)))
2806 goto out; 2806 goto out;
2807 2807
2808 /* force the process onto the specified CPU */ 2808 /* force the process onto the specified CPU */
@@ -3770,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick)
3770 /* 3770 /*
3771 * If we are going offline and still the leader, give up! 3771 * If we are going offline and still the leader, give up!
3772 */ 3772 */
3773 if (cpu_is_offline(cpu) && 3773 if (!cpu_active(cpu) &&
3774 atomic_read(&nohz.load_balancer) == cpu) { 3774 atomic_read(&nohz.load_balancer) == cpu) {
3775 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3775 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3776 BUG(); 3776 BUG();
@@ -5794,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5794 struct rq *rq_dest, *rq_src; 5794 struct rq *rq_dest, *rq_src;
5795 int ret = 0, on_rq; 5795 int ret = 0, on_rq;
5796 5796
5797 if (unlikely(cpu_is_offline(dest_cpu))) 5797 if (unlikely(!cpu_active(dest_cpu)))
5798 return ret; 5798 return ret;
5799 5799
5800 rq_src = cpu_rq(src_cpu); 5800 rq_src = cpu_rq(src_cpu);
@@ -7472,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
7472} 7472}
7473 7473
7474/* 7474/*
7475 * Free current domain masks.
7476 * Called after all cpus are attached to NULL domain.
7477 */
7478static void free_sched_domains(void)
7479{
7480 ndoms_cur = 0;
7481 if (doms_cur != &fallback_doms)
7482 kfree(doms_cur);
7483 doms_cur = &fallback_doms;
7484}
7485
7486/*
7487 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7475 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
7488 * For now this just excludes isolated cpus, but could be used to 7476 * For now this just excludes isolated cpus, but could be used to
7489 * exclude other special cases in the future. 7477 * exclude other special cases in the future.
@@ -7561,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7561 * ownership of it and will kfree it when done with it. If the caller 7549 * ownership of it and will kfree it when done with it. If the caller
7562 * failed the kmalloc call, then it can pass in doms_new == NULL, 7550 * failed the kmalloc call, then it can pass in doms_new == NULL,
7563 * and partition_sched_domains() will fallback to the single partition 7551 * and partition_sched_domains() will fallback to the single partition
7564 * 'fallback_doms'. 7552 * 'fallback_doms', it also forces the domains to be rebuilt.
7565 * 7553 *
7566 * Call with hotplug lock held 7554 * Call with hotplug lock held
7567 */ 7555 */
@@ -7575,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7575 /* always unregister in case we don't destroy any domains */ 7563 /* always unregister in case we don't destroy any domains */
7576 unregister_sched_domain_sysctl(); 7564 unregister_sched_domain_sysctl();
7577 7565
7578 if (doms_new == NULL) { 7566 if (doms_new == NULL)
7579 ndoms_new = 1; 7567 ndoms_new = 0;
7580 doms_new = &fallback_doms;
7581 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7582 dattr_new = NULL;
7583 }
7584 7568
7585 /* Destroy deleted domains */ 7569 /* Destroy deleted domains */
7586 for (i = 0; i < ndoms_cur; i++) { 7570 for (i = 0; i < ndoms_cur; i++) {
@@ -7595,6 +7579,14 @@ match1:
7595 ; 7579 ;
7596 } 7580 }
7597 7581
7582 if (doms_new == NULL) {
7583 ndoms_cur = 0;
7584 ndoms_new = 1;
7585 doms_new = &fallback_doms;
7586 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7587 dattr_new = NULL;
7588 }
7589
7598 /* Build new domains */ 7590 /* Build new domains */
7599 for (i = 0; i < ndoms_new; i++) { 7591 for (i = 0; i < ndoms_new; i++) {
7600 for (j = 0; j < ndoms_cur; j++) { 7592 for (j = 0; j < ndoms_cur; j++) {
@@ -7625,17 +7617,10 @@ match2:
7625#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7617#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
7626int arch_reinit_sched_domains(void) 7618int arch_reinit_sched_domains(void)
7627{ 7619{
7628 int err;
7629
7630 get_online_cpus(); 7620 get_online_cpus();
7631 mutex_lock(&sched_domains_mutex); 7621 rebuild_sched_domains();
7632 detach_destroy_domains(&cpu_online_map);
7633 free_sched_domains();
7634 err = arch_init_sched_domains(&cpu_online_map);
7635 mutex_unlock(&sched_domains_mutex);
7636 put_online_cpus(); 7622 put_online_cpus();
7637 7623 return 0;
7638 return err;
7639} 7624}
7640 7625
7641static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 7626static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7701,59 +7686,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7701} 7686}
7702#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ 7687#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7703 7688
7689#ifndef CONFIG_CPUSETS
7704/* 7690/*
7705 * Force a reinitialization of the sched domains hierarchy. The domains 7691 * Add online and remove offline CPUs from the scheduler domains.
7706 * and groups cannot be updated in place without racing with the balancing 7692 * When cpusets are enabled they take over this function.
7707 * code, so we temporarily attach all running cpus to the NULL domain
7708 * which will prevent rebalancing while the sched domains are recalculated.
7709 */ 7693 */
7710static int update_sched_domains(struct notifier_block *nfb, 7694static int update_sched_domains(struct notifier_block *nfb,
7711 unsigned long action, void *hcpu) 7695 unsigned long action, void *hcpu)
7712{ 7696{
7697 switch (action) {
7698 case CPU_ONLINE:
7699 case CPU_ONLINE_FROZEN:
7700 case CPU_DEAD:
7701 case CPU_DEAD_FROZEN:
7702 partition_sched_domains(0, NULL, NULL);
7703 return NOTIFY_OK;
7704
7705 default:
7706 return NOTIFY_DONE;
7707 }
7708}
7709#endif
7710
7711static int update_runtime(struct notifier_block *nfb,
7712 unsigned long action, void *hcpu)
7713{
7713 int cpu = (int)(long)hcpu; 7714 int cpu = (int)(long)hcpu;
7714 7715
7715 switch (action) { 7716 switch (action) {
7716 case CPU_DOWN_PREPARE: 7717 case CPU_DOWN_PREPARE:
7717 case CPU_DOWN_PREPARE_FROZEN: 7718 case CPU_DOWN_PREPARE_FROZEN:
7718 disable_runtime(cpu_rq(cpu)); 7719 disable_runtime(cpu_rq(cpu));
7719 /* fall-through */
7720 case CPU_UP_PREPARE:
7721 case CPU_UP_PREPARE_FROZEN:
7722 detach_destroy_domains(&cpu_online_map);
7723 free_sched_domains();
7724 return NOTIFY_OK; 7720 return NOTIFY_OK;
7725 7721
7726
7727 case CPU_DOWN_FAILED: 7722 case CPU_DOWN_FAILED:
7728 case CPU_DOWN_FAILED_FROZEN: 7723 case CPU_DOWN_FAILED_FROZEN:
7729 case CPU_ONLINE: 7724 case CPU_ONLINE:
7730 case CPU_ONLINE_FROZEN: 7725 case CPU_ONLINE_FROZEN:
7731 enable_runtime(cpu_rq(cpu)); 7726 enable_runtime(cpu_rq(cpu));
7732 /* fall-through */ 7727 return NOTIFY_OK;
7733 case CPU_UP_CANCELED: 7728
7734 case CPU_UP_CANCELED_FROZEN:
7735 case CPU_DEAD:
7736 case CPU_DEAD_FROZEN:
7737 /*
7738 * Fall through and re-initialise the domains.
7739 */
7740 break;
7741 default: 7729 default:
7742 return NOTIFY_DONE; 7730 return NOTIFY_DONE;
7743 } 7731 }
7744
7745#ifndef CONFIG_CPUSETS
7746 /*
7747 * Create default domain partitioning if cpusets are disabled.
7748 * Otherwise we let cpusets rebuild the domains based on the
7749 * current setup.
7750 */
7751
7752 /* The hotplug lock is already held by cpu_up/cpu_down */
7753 arch_init_sched_domains(&cpu_online_map);
7754#endif
7755
7756 return NOTIFY_OK;
7757} 7732}
7758 7733
7759void __init sched_init_smp(void) 7734void __init sched_init_smp(void)
@@ -7773,8 +7748,15 @@ void __init sched_init_smp(void)
7773 cpu_set(smp_processor_id(), non_isolated_cpus); 7748 cpu_set(smp_processor_id(), non_isolated_cpus);
7774 mutex_unlock(&sched_domains_mutex); 7749 mutex_unlock(&sched_domains_mutex);
7775 put_online_cpus(); 7750 put_online_cpus();
7751
7752#ifndef CONFIG_CPUSETS
7776 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7753 /* XXX: Theoretical race here - CPU may be hotplugged now */
7777 hotcpu_notifier(update_sched_domains, 0); 7754 hotcpu_notifier(update_sched_domains, 0);
7755#endif
7756
7757 /* RT runtime code needs to handle some hotplug events */
7758 hotcpu_notifier(update_runtime, 0);
7759
7778 init_hrtick(); 7760 init_hrtick();
7779 7761
7780 /* Move init over to a non-isolated CPU */ 7762 /* Move init over to a non-isolated CPU */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6893b3ed65fe..7f700263f04c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1003,6 +1003,8 @@ static void yield_task_fair(struct rq *rq)
1003 * not idle and an idle cpu is available. The span of cpus to 1003 * not idle and an idle cpu is available. The span of cpus to
1004 * search starts with cpus closest then further out as needed, 1004 * search starts with cpus closest then further out as needed,
1005 * so we always favor a closer, idle cpu. 1005 * so we always favor a closer, idle cpu.
1006 * Domains may include CPUs that are not usable for migration,
1007 * hence we need to mask them out (cpu_active_map)
1006 * 1008 *
1007 * Returns the CPU we should wake onto. 1009 * Returns the CPU we should wake onto.
1008 */ 1010 */
@@ -1030,6 +1032,7 @@ static int wake_idle(int cpu, struct task_struct *p)
1030 || ((sd->flags & SD_WAKE_IDLE_FAR) 1032 || ((sd->flags & SD_WAKE_IDLE_FAR)
1031 && !task_hot(p, task_rq(p)->clock, sd))) { 1033 && !task_hot(p, task_rq(p)->clock, sd))) {
1032 cpus_and(tmp, sd->span, p->cpus_allowed); 1034 cpus_and(tmp, sd->span, p->cpus_allowed);
1035 cpus_and(tmp, tmp, cpu_active_map);
1033 for_each_cpu_mask(i, tmp) { 1036 for_each_cpu_mask(i, tmp) {
1034 if (idle_cpu(i)) { 1037 if (idle_cpu(i)) {
1035 if (i != task_cpu(p)) { 1038 if (i != task_cpu(p)) {
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 147004c651c0..24621cea8bb0 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -601,11 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
601 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 601 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
602 return; 602 return;
603 603
604 if (rt_se->nr_cpus_allowed == 1) 604 list_add_tail(&rt_se->run_list, queue);
605 list_add(&rt_se->run_list, queue);
606 else
607 list_add_tail(&rt_se->run_list, queue);
608
609 __set_bit(rt_se_prio(rt_se), array->bitmap); 605 __set_bit(rt_se_prio(rt_se), array->bitmap);
610 606
611 inc_rt_tasks(rt_se, rt_rq); 607 inc_rt_tasks(rt_se, rt_rq);
@@ -690,32 +686,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
690 * Put task to the end of the run list without the overhead of dequeue 686 * Put task to the end of the run list without the overhead of dequeue
691 * followed by enqueue. 687 * followed by enqueue.
692 */ 688 */
693static 689static void
694void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 690requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
695{ 691{
696 struct rt_prio_array *array = &rt_rq->active;
697
698 if (on_rt_rq(rt_se)) { 692 if (on_rt_rq(rt_se)) {
699 list_del_init(&rt_se->run_list); 693 struct rt_prio_array *array = &rt_rq->active;
700 list_add_tail(&rt_se->run_list, 694 struct list_head *queue = array->queue + rt_se_prio(rt_se);
701 array->queue + rt_se_prio(rt_se)); 695
696 if (head)
697 list_move(&rt_se->run_list, queue);
698 else
699 list_move_tail(&rt_se->run_list, queue);
702 } 700 }
703} 701}
704 702
705static void requeue_task_rt(struct rq *rq, struct task_struct *p) 703static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
706{ 704{
707 struct sched_rt_entity *rt_se = &p->rt; 705 struct sched_rt_entity *rt_se = &p->rt;
708 struct rt_rq *rt_rq; 706 struct rt_rq *rt_rq;
709 707
710 for_each_sched_rt_entity(rt_se) { 708 for_each_sched_rt_entity(rt_se) {
711 rt_rq = rt_rq_of_se(rt_se); 709 rt_rq = rt_rq_of_se(rt_se);
712 requeue_rt_entity(rt_rq, rt_se); 710 requeue_rt_entity(rt_rq, rt_se, head);
713 } 711 }
714} 712}
715 713
716static void yield_task_rt(struct rq *rq) 714static void yield_task_rt(struct rq *rq)
717{ 715{
718 requeue_task_rt(rq, rq->curr); 716 requeue_task_rt(rq, rq->curr, 0);
719} 717}
720 718
721#ifdef CONFIG_SMP 719#ifdef CONFIG_SMP
@@ -755,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
755 */ 753 */
756 return task_cpu(p); 754 return task_cpu(p);
757} 755}
756
757static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
758{
759 cpumask_t mask;
760
761 if (rq->curr->rt.nr_cpus_allowed == 1)
762 return;
763
764 if (p->rt.nr_cpus_allowed != 1
765 && cpupri_find(&rq->rd->cpupri, p, &mask))
766 return;
767
768 if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
769 return;
770
771 /*
772 * There appears to be other cpus that can accept
773 * current and none to run 'p', so lets reschedule
774 * to try and push current away:
775 */
776 requeue_task_rt(rq, p, 1);
777 resched_task(rq->curr);
778}
779
758#endif /* CONFIG_SMP */ 780#endif /* CONFIG_SMP */
759 781
760/* 782/*
@@ -780,18 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
780 * to move current somewhere else, making room for our non-migratable 802 * to move current somewhere else, making room for our non-migratable
781 * task. 803 * task.
782 */ 804 */
783 if((p->prio == rq->curr->prio) 805 if (p->prio == rq->curr->prio && !need_resched())
784 && p->rt.nr_cpus_allowed == 1 806 check_preempt_equal_prio(rq, p);
785 && rq->curr->rt.nr_cpus_allowed != 1) {
786 cpumask_t mask;
787
788 if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
789 /*
790 * There appears to be other cpus that can accept
791 * current, so lets reschedule to try and push it away
792 */
793 resched_task(rq->curr);
794 }
795#endif 807#endif
796} 808}
797 809
@@ -924,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task)
924 return -1; /* No targets found */ 936 return -1; /* No targets found */
925 937
926 /* 938 /*
939 * Only consider CPUs that are usable for migration.
940 * I guess we might want to change cpupri_find() to ignore those
941 * in the first place.
942 */
943 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
944
945 /*
927 * At this point we have built a mask of cpus representing the 946 * At this point we have built a mask of cpus representing the
928 * lowest priority tasks in the system. Now we want to elect 947 * lowest priority tasks in the system. Now we want to elect
929 * the best one based on our affinity and topology. 948 * the best one based on our affinity and topology.
@@ -1417,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1417 * on the queue: 1436 * on the queue:
1418 */ 1437 */
1419 if (p->rt.run_list.prev != p->rt.run_list.next) { 1438 if (p->rt.run_list.prev != p->rt.run_list.next) {
1420 requeue_task_rt(rq, p); 1439 requeue_task_rt(rq, p, 0);
1421 set_tsk_need_resched(p); 1440 set_tsk_need_resched(p);
1422 } 1441 }
1423} 1442}