diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-05-12 03:51:36 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-05-12 03:51:36 -0400 |
commit | 4eb867651721228ee2eeae142c53378375303e8b (patch) | |
tree | 43ebd9894ff53a7b2d0244293f226cbbfc80d882 /kernel/sched | |
parent | eb60b3e5e8dfdd590e586a6fc22daf2f63a7b7e6 (diff) | |
parent | e5ef27d0f5acf9f1db2882d7546a41c021f66820 (diff) |
Merge branch 'smp/hotplug' into sched/core, to resolve conflicts
Conflicts:
kernel/sched/core.c
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 399 | ||||
-rw-r--r-- | kernel/sched/fair.c | 15 | ||||
-rw-r--r-- | kernel/sched/sched.h | 4 |
3 files changed, 180 insertions, 238 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e09f92c3a096..1e622f254df4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -314,29 +314,6 @@ void hrtick_start(struct rq *rq, u64 delay) | |||
314 | } | 314 | } |
315 | } | 315 | } |
316 | 316 | ||
317 | static int | ||
318 | hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
319 | { | ||
320 | int cpu = (int)(long)hcpu; | ||
321 | |||
322 | switch (action) { | ||
323 | case CPU_UP_CANCELED: | ||
324 | case CPU_UP_CANCELED_FROZEN: | ||
325 | case CPU_DOWN_PREPARE: | ||
326 | case CPU_DOWN_PREPARE_FROZEN: | ||
327 | case CPU_DEAD: | ||
328 | case CPU_DEAD_FROZEN: | ||
329 | hrtick_clear(cpu_rq(cpu)); | ||
330 | return NOTIFY_OK; | ||
331 | } | ||
332 | |||
333 | return NOTIFY_DONE; | ||
334 | } | ||
335 | |||
336 | static __init void init_hrtick(void) | ||
337 | { | ||
338 | hotcpu_notifier(hotplug_hrtick, 0); | ||
339 | } | ||
340 | #else | 317 | #else |
341 | /* | 318 | /* |
342 | * Called to set the hrtick timer state. | 319 | * Called to set the hrtick timer state. |
@@ -353,10 +330,6 @@ void hrtick_start(struct rq *rq, u64 delay) | |||
353 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), | 330 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), |
354 | HRTIMER_MODE_REL_PINNED); | 331 | HRTIMER_MODE_REL_PINNED); |
355 | } | 332 | } |
356 | |||
357 | static inline void init_hrtick(void) | ||
358 | { | ||
359 | } | ||
360 | #endif /* CONFIG_SMP */ | 333 | #endif /* CONFIG_SMP */ |
361 | 334 | ||
362 | static void init_rq_hrtick(struct rq *rq) | 335 | static void init_rq_hrtick(struct rq *rq) |
@@ -380,10 +353,6 @@ static inline void hrtick_clear(struct rq *rq) | |||
380 | static inline void init_rq_hrtick(struct rq *rq) | 353 | static inline void init_rq_hrtick(struct rq *rq) |
381 | { | 354 | { |
382 | } | 355 | } |
383 | |||
384 | static inline void init_hrtick(void) | ||
385 | { | ||
386 | } | ||
387 | #endif /* CONFIG_SCHED_HRTICK */ | 356 | #endif /* CONFIG_SCHED_HRTICK */ |
388 | 357 | ||
389 | /* | 358 | /* |
@@ -1150,6 +1119,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |||
1150 | static int __set_cpus_allowed_ptr(struct task_struct *p, | 1119 | static int __set_cpus_allowed_ptr(struct task_struct *p, |
1151 | const struct cpumask *new_mask, bool check) | 1120 | const struct cpumask *new_mask, bool check) |
1152 | { | 1121 | { |
1122 | const struct cpumask *cpu_valid_mask = cpu_active_mask; | ||
1153 | unsigned int dest_cpu; | 1123 | unsigned int dest_cpu; |
1154 | struct rq_flags rf; | 1124 | struct rq_flags rf; |
1155 | struct rq *rq; | 1125 | struct rq *rq; |
@@ -1157,6 +1127,13 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, | |||
1157 | 1127 | ||
1158 | rq = task_rq_lock(p, &rf); | 1128 | rq = task_rq_lock(p, &rf); |
1159 | 1129 | ||
1130 | if (p->flags & PF_KTHREAD) { | ||
1131 | /* | ||
1132 | * Kernel threads are allowed on online && !active CPUs | ||
1133 | */ | ||
1134 | cpu_valid_mask = cpu_online_mask; | ||
1135 | } | ||
1136 | |||
1160 | /* | 1137 | /* |
1161 | * Must re-check here, to close a race against __kthread_bind(), | 1138 | * Must re-check here, to close a race against __kthread_bind(), |
1162 | * sched_setaffinity() is not guaranteed to observe the flag. | 1139 | * sched_setaffinity() is not guaranteed to observe the flag. |
@@ -1169,18 +1146,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, | |||
1169 | if (cpumask_equal(&p->cpus_allowed, new_mask)) | 1146 | if (cpumask_equal(&p->cpus_allowed, new_mask)) |
1170 | goto out; | 1147 | goto out; |
1171 | 1148 | ||
1172 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 1149 | if (!cpumask_intersects(new_mask, cpu_valid_mask)) { |
1173 | ret = -EINVAL; | 1150 | ret = -EINVAL; |
1174 | goto out; | 1151 | goto out; |
1175 | } | 1152 | } |
1176 | 1153 | ||
1177 | do_set_cpus_allowed(p, new_mask); | 1154 | do_set_cpus_allowed(p, new_mask); |
1178 | 1155 | ||
1156 | if (p->flags & PF_KTHREAD) { | ||
1157 | /* | ||
1158 | * For kernel threads that do indeed end up on online && | ||
1159 | * !active we want to ensure they are strict per-cpu threads. | ||
1160 | */ | ||
1161 | WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && | ||
1162 | !cpumask_intersects(new_mask, cpu_active_mask) && | ||
1163 | p->nr_cpus_allowed != 1); | ||
1164 | } | ||
1165 | |||
1179 | /* Can the task run on the task's current CPU? If so, we're done */ | 1166 | /* Can the task run on the task's current CPU? If so, we're done */ |
1180 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 1167 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
1181 | goto out; | 1168 | goto out; |
1182 | 1169 | ||
1183 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 1170 | dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); |
1184 | if (task_running(rq, p) || p->state == TASK_WAKING) { | 1171 | if (task_running(rq, p) || p->state == TASK_WAKING) { |
1185 | struct migration_arg arg = { p, dest_cpu }; | 1172 | struct migration_arg arg = { p, dest_cpu }; |
1186 | /* Need help from migration thread: drop lock and wait. */ | 1173 | /* Need help from migration thread: drop lock and wait. */ |
@@ -1499,6 +1486,25 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
1499 | 1486 | ||
1500 | /* | 1487 | /* |
1501 | * ->cpus_allowed is protected by both rq->lock and p->pi_lock | 1488 | * ->cpus_allowed is protected by both rq->lock and p->pi_lock |
1489 | * | ||
1490 | * A few notes on cpu_active vs cpu_online: | ||
1491 | * | ||
1492 | * - cpu_active must be a subset of cpu_online | ||
1493 | * | ||
1494 | * - on cpu-up we allow per-cpu kthreads on the online && !active cpu, | ||
1495 | * see __set_cpus_allowed_ptr(). At this point the newly online | ||
1496 | * cpu isn't yet part of the sched domains, and balancing will not | ||
1497 | * see it. | ||
1498 | * | ||
1499 | * - on cpu-down we clear cpu_active() to mask the sched domains and | ||
1500 | * avoid the load balancer to place new tasks on the to be removed | ||
1501 | * cpu. Existing tasks will remain running there and will be taken | ||
1502 | * off. | ||
1503 | * | ||
1504 | * This means that fallback selection must not select !active CPUs. | ||
1505 | * And can assume that any active CPU must be online. Conversely | ||
1506 | * select_task_rq() below may allow selection of !active CPUs in order | ||
1507 | * to satisfy the above rules. | ||
1502 | */ | 1508 | */ |
1503 | static int select_fallback_rq(int cpu, struct task_struct *p) | 1509 | static int select_fallback_rq(int cpu, struct task_struct *p) |
1504 | { | 1510 | { |
@@ -1517,8 +1523,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
1517 | 1523 | ||
1518 | /* Look for allowed, online CPU in same node. */ | 1524 | /* Look for allowed, online CPU in same node. */ |
1519 | for_each_cpu(dest_cpu, nodemask) { | 1525 | for_each_cpu(dest_cpu, nodemask) { |
1520 | if (!cpu_online(dest_cpu)) | ||
1521 | continue; | ||
1522 | if (!cpu_active(dest_cpu)) | 1526 | if (!cpu_active(dest_cpu)) |
1523 | continue; | 1527 | continue; |
1524 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 1528 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) |
@@ -1529,8 +1533,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
1529 | for (;;) { | 1533 | for (;;) { |
1530 | /* Any allowed, online CPU? */ | 1534 | /* Any allowed, online CPU? */ |
1531 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { | 1535 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { |
1532 | if (!cpu_online(dest_cpu)) | ||
1533 | continue; | ||
1534 | if (!cpu_active(dest_cpu)) | 1536 | if (!cpu_active(dest_cpu)) |
1535 | continue; | 1537 | continue; |
1536 | goto out; | 1538 | goto out; |
@@ -1582,6 +1584,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | |||
1582 | 1584 | ||
1583 | if (p->nr_cpus_allowed > 1) | 1585 | if (p->nr_cpus_allowed > 1) |
1584 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); | 1586 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); |
1587 | else | ||
1588 | cpu = cpumask_any(tsk_cpus_allowed(p)); | ||
1585 | 1589 | ||
1586 | /* | 1590 | /* |
1587 | * In order not to call set_task_cpu() on a blocking task we need | 1591 | * In order not to call set_task_cpu() on a blocking task we need |
@@ -5288,6 +5292,8 @@ out: | |||
5288 | 5292 | ||
5289 | #ifdef CONFIG_SMP | 5293 | #ifdef CONFIG_SMP |
5290 | 5294 | ||
5295 | static bool sched_smp_initialized __read_mostly; | ||
5296 | |||
5291 | #ifdef CONFIG_NUMA_BALANCING | 5297 | #ifdef CONFIG_NUMA_BALANCING |
5292 | /* Migrate current task p to target_cpu */ | 5298 | /* Migrate current task p to target_cpu */ |
5293 | int migrate_task_to(struct task_struct *p, int target_cpu) | 5299 | int migrate_task_to(struct task_struct *p, int target_cpu) |
@@ -5503,127 +5509,13 @@ static void set_rq_offline(struct rq *rq) | |||
5503 | } | 5509 | } |
5504 | } | 5510 | } |
5505 | 5511 | ||
5506 | /* | 5512 | static void set_cpu_rq_start_time(unsigned int cpu) |
5507 | * migration_call - callback that gets triggered when a CPU is added. | ||
5508 | * Here we can start up the necessary migration thread for the new CPU. | ||
5509 | */ | ||
5510 | static int | ||
5511 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
5512 | { | 5513 | { |
5513 | int cpu = (long)hcpu; | ||
5514 | unsigned long flags; | ||
5515 | struct rq *rq = cpu_rq(cpu); | 5514 | struct rq *rq = cpu_rq(cpu); |
5516 | 5515 | ||
5517 | switch (action & ~CPU_TASKS_FROZEN) { | ||
5518 | |||
5519 | case CPU_UP_PREPARE: | ||
5520 | rq->calc_load_update = calc_load_update; | ||
5521 | account_reset_rq(rq); | ||
5522 | break; | ||
5523 | |||
5524 | case CPU_ONLINE: | ||
5525 | /* Update our root-domain */ | ||
5526 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
5527 | if (rq->rd) { | ||
5528 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | ||
5529 | |||
5530 | set_rq_online(rq); | ||
5531 | } | ||
5532 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
5533 | break; | ||
5534 | |||
5535 | #ifdef CONFIG_HOTPLUG_CPU | ||
5536 | case CPU_DYING: | ||
5537 | sched_ttwu_pending(); | ||
5538 | /* Update our root-domain */ | ||
5539 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
5540 | if (rq->rd) { | ||
5541 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | ||
5542 | set_rq_offline(rq); | ||
5543 | } | ||
5544 | migrate_tasks(rq); | ||
5545 | BUG_ON(rq->nr_running != 1); /* the migration thread */ | ||
5546 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
5547 | break; | ||
5548 | |||
5549 | case CPU_DEAD: | ||
5550 | calc_load_migrate(rq); | ||
5551 | break; | ||
5552 | #endif | ||
5553 | } | ||
5554 | |||
5555 | update_max_interval(); | ||
5556 | |||
5557 | return NOTIFY_OK; | ||
5558 | } | ||
5559 | |||
5560 | /* | ||
5561 | * Register at high priority so that task migration (migrate_all_tasks) | ||
5562 | * happens before everything else. This has to be lower priority than | ||
5563 | * the notifier in the perf_event subsystem, though. | ||
5564 | */ | ||
5565 | static struct notifier_block migration_notifier = { | ||
5566 | .notifier_call = migration_call, | ||
5567 | .priority = CPU_PRI_MIGRATION, | ||
5568 | }; | ||
5569 | |||
5570 | static void set_cpu_rq_start_time(void) | ||
5571 | { | ||
5572 | int cpu = smp_processor_id(); | ||
5573 | struct rq *rq = cpu_rq(cpu); | ||
5574 | rq->age_stamp = sched_clock_cpu(cpu); | 5516 | rq->age_stamp = sched_clock_cpu(cpu); |
5575 | } | 5517 | } |
5576 | 5518 | ||
5577 | static int sched_cpu_active(struct notifier_block *nfb, | ||
5578 | unsigned long action, void *hcpu) | ||
5579 | { | ||
5580 | int cpu = (long)hcpu; | ||
5581 | |||
5582 | switch (action & ~CPU_TASKS_FROZEN) { | ||
5583 | case CPU_STARTING: | ||
5584 | set_cpu_rq_start_time(); | ||
5585 | return NOTIFY_OK; | ||
5586 | |||
5587 | case CPU_DOWN_FAILED: | ||
5588 | set_cpu_active(cpu, true); | ||
5589 | return NOTIFY_OK; | ||
5590 | |||
5591 | default: | ||
5592 | return NOTIFY_DONE; | ||
5593 | } | ||
5594 | } | ||
5595 | |||
5596 | static int sched_cpu_inactive(struct notifier_block *nfb, | ||
5597 | unsigned long action, void *hcpu) | ||
5598 | { | ||
5599 | switch (action & ~CPU_TASKS_FROZEN) { | ||
5600 | case CPU_DOWN_PREPARE: | ||
5601 | set_cpu_active((long)hcpu, false); | ||
5602 | return NOTIFY_OK; | ||
5603 | default: | ||
5604 | return NOTIFY_DONE; | ||
5605 | } | ||
5606 | } | ||
5607 | |||
5608 | static int __init migration_init(void) | ||
5609 | { | ||
5610 | void *cpu = (void *)(long)smp_processor_id(); | ||
5611 | int err; | ||
5612 | |||
5613 | /* Initialize migration for the boot CPU */ | ||
5614 | err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); | ||
5615 | BUG_ON(err == NOTIFY_BAD); | ||
5616 | migration_call(&migration_notifier, CPU_ONLINE, cpu); | ||
5617 | register_cpu_notifier(&migration_notifier); | ||
5618 | |||
5619 | /* Register cpu active notifiers */ | ||
5620 | cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE); | ||
5621 | cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE); | ||
5622 | |||
5623 | return 0; | ||
5624 | } | ||
5625 | early_initcall(migration_init); | ||
5626 | |||
5627 | static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ | 5519 | static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ |
5628 | 5520 | ||
5629 | #ifdef CONFIG_SCHED_DEBUG | 5521 | #ifdef CONFIG_SCHED_DEBUG |
@@ -6771,10 +6663,10 @@ static void sched_init_numa(void) | |||
6771 | init_numa_topology_type(); | 6663 | init_numa_topology_type(); |
6772 | } | 6664 | } |
6773 | 6665 | ||
6774 | static void sched_domains_numa_masks_set(int cpu) | 6666 | static void sched_domains_numa_masks_set(unsigned int cpu) |
6775 | { | 6667 | { |
6776 | int i, j; | ||
6777 | int node = cpu_to_node(cpu); | 6668 | int node = cpu_to_node(cpu); |
6669 | int i, j; | ||
6778 | 6670 | ||
6779 | for (i = 0; i < sched_domains_numa_levels; i++) { | 6671 | for (i = 0; i < sched_domains_numa_levels; i++) { |
6780 | for (j = 0; j < nr_node_ids; j++) { | 6672 | for (j = 0; j < nr_node_ids; j++) { |
@@ -6784,51 +6676,20 @@ static void sched_domains_numa_masks_set(int cpu) | |||
6784 | } | 6676 | } |
6785 | } | 6677 | } |
6786 | 6678 | ||
6787 | static void sched_domains_numa_masks_clear(int cpu) | 6679 | static void sched_domains_numa_masks_clear(unsigned int cpu) |
6788 | { | 6680 | { |
6789 | int i, j; | 6681 | int i, j; |
6682 | |||
6790 | for (i = 0; i < sched_domains_numa_levels; i++) { | 6683 | for (i = 0; i < sched_domains_numa_levels; i++) { |
6791 | for (j = 0; j < nr_node_ids; j++) | 6684 | for (j = 0; j < nr_node_ids; j++) |
6792 | cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); | 6685 | cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); |
6793 | } | 6686 | } |
6794 | } | 6687 | } |
6795 | 6688 | ||
6796 | /* | ||
6797 | * Update sched_domains_numa_masks[level][node] array when new cpus | ||
6798 | * are onlined. | ||
6799 | */ | ||
6800 | static int sched_domains_numa_masks_update(struct notifier_block *nfb, | ||
6801 | unsigned long action, | ||
6802 | void *hcpu) | ||
6803 | { | ||
6804 | int cpu = (long)hcpu; | ||
6805 | |||
6806 | switch (action & ~CPU_TASKS_FROZEN) { | ||
6807 | case CPU_ONLINE: | ||
6808 | sched_domains_numa_masks_set(cpu); | ||
6809 | break; | ||
6810 | |||
6811 | case CPU_DEAD: | ||
6812 | sched_domains_numa_masks_clear(cpu); | ||
6813 | break; | ||
6814 | |||
6815 | default: | ||
6816 | return NOTIFY_DONE; | ||
6817 | } | ||
6818 | |||
6819 | return NOTIFY_OK; | ||
6820 | } | ||
6821 | #else | 6689 | #else |
6822 | static inline void sched_init_numa(void) | 6690 | static inline void sched_init_numa(void) { } |
6823 | { | 6691 | static void sched_domains_numa_masks_set(unsigned int cpu) { } |
6824 | } | 6692 | static void sched_domains_numa_masks_clear(unsigned int cpu) { } |
6825 | |||
6826 | static int sched_domains_numa_masks_update(struct notifier_block *nfb, | ||
6827 | unsigned long action, | ||
6828 | void *hcpu) | ||
6829 | { | ||
6830 | return 0; | ||
6831 | } | ||
6832 | #endif /* CONFIG_NUMA */ | 6693 | #endif /* CONFIG_NUMA */ |
6833 | 6694 | ||
6834 | static int __sdt_alloc(const struct cpumask *cpu_map) | 6695 | static int __sdt_alloc(const struct cpumask *cpu_map) |
@@ -7218,13 +7079,9 @@ static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ | |||
7218 | * If we come here as part of a suspend/resume, don't touch cpusets because we | 7079 | * If we come here as part of a suspend/resume, don't touch cpusets because we |
7219 | * want to restore it back to its original state upon resume anyway. | 7080 | * want to restore it back to its original state upon resume anyway. |
7220 | */ | 7081 | */ |
7221 | static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | 7082 | static void cpuset_cpu_active(void) |
7222 | void *hcpu) | ||
7223 | { | 7083 | { |
7224 | switch (action) { | 7084 | if (cpuhp_tasks_frozen) { |
7225 | case CPU_ONLINE_FROZEN: | ||
7226 | case CPU_DOWN_FAILED_FROZEN: | ||
7227 | |||
7228 | /* | 7085 | /* |
7229 | * num_cpus_frozen tracks how many CPUs are involved in suspend | 7086 | * num_cpus_frozen tracks how many CPUs are involved in suspend |
7230 | * resume sequence. As long as this is not the last online | 7087 | * resume sequence. As long as this is not the last online |
@@ -7234,35 +7091,25 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | |||
7234 | num_cpus_frozen--; | 7091 | num_cpus_frozen--; |
7235 | if (likely(num_cpus_frozen)) { | 7092 | if (likely(num_cpus_frozen)) { |
7236 | partition_sched_domains(1, NULL, NULL); | 7093 | partition_sched_domains(1, NULL, NULL); |
7237 | break; | 7094 | return; |
7238 | } | 7095 | } |
7239 | |||
7240 | /* | 7096 | /* |
7241 | * This is the last CPU online operation. So fall through and | 7097 | * This is the last CPU online operation. So fall through and |
7242 | * restore the original sched domains by considering the | 7098 | * restore the original sched domains by considering the |
7243 | * cpuset configurations. | 7099 | * cpuset configurations. |
7244 | */ | 7100 | */ |
7245 | |||
7246 | case CPU_ONLINE: | ||
7247 | cpuset_update_active_cpus(true); | ||
7248 | break; | ||
7249 | default: | ||
7250 | return NOTIFY_DONE; | ||
7251 | } | 7101 | } |
7252 | return NOTIFY_OK; | 7102 | cpuset_update_active_cpus(true); |
7253 | } | 7103 | } |
7254 | 7104 | ||
7255 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, | 7105 | static int cpuset_cpu_inactive(unsigned int cpu) |
7256 | void *hcpu) | ||
7257 | { | 7106 | { |
7258 | unsigned long flags; | 7107 | unsigned long flags; |
7259 | long cpu = (long)hcpu; | ||
7260 | struct dl_bw *dl_b; | 7108 | struct dl_bw *dl_b; |
7261 | bool overflow; | 7109 | bool overflow; |
7262 | int cpus; | 7110 | int cpus; |
7263 | 7111 | ||
7264 | switch (action) { | 7112 | if (!cpuhp_tasks_frozen) { |
7265 | case CPU_DOWN_PREPARE: | ||
7266 | rcu_read_lock_sched(); | 7113 | rcu_read_lock_sched(); |
7267 | dl_b = dl_bw_of(cpu); | 7114 | dl_b = dl_bw_of(cpu); |
7268 | 7115 | ||
@@ -7274,19 +7121,120 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, | |||
7274 | rcu_read_unlock_sched(); | 7121 | rcu_read_unlock_sched(); |
7275 | 7122 | ||
7276 | if (overflow) | 7123 | if (overflow) |
7277 | return notifier_from_errno(-EBUSY); | 7124 | return -EBUSY; |
7278 | cpuset_update_active_cpus(false); | 7125 | cpuset_update_active_cpus(false); |
7279 | break; | 7126 | } else { |
7280 | case CPU_DOWN_PREPARE_FROZEN: | ||
7281 | num_cpus_frozen++; | 7127 | num_cpus_frozen++; |
7282 | partition_sched_domains(1, NULL, NULL); | 7128 | partition_sched_domains(1, NULL, NULL); |
7283 | break; | ||
7284 | default: | ||
7285 | return NOTIFY_DONE; | ||
7286 | } | 7129 | } |
7287 | return NOTIFY_OK; | 7130 | return 0; |
7288 | } | 7131 | } |
7289 | 7132 | ||
7133 | int sched_cpu_activate(unsigned int cpu) | ||
7134 | { | ||
7135 | struct rq *rq = cpu_rq(cpu); | ||
7136 | unsigned long flags; | ||
7137 | |||
7138 | set_cpu_active(cpu, true); | ||
7139 | |||
7140 | if (sched_smp_initialized) { | ||
7141 | sched_domains_numa_masks_set(cpu); | ||
7142 | cpuset_cpu_active(); | ||
7143 | } | ||
7144 | |||
7145 | /* | ||
7146 | * Put the rq online, if not already. This happens: | ||
7147 | * | ||
7148 | * 1) In the early boot process, because we build the real domains | ||
7149 | * after all cpus have been brought up. | ||
7150 | * | ||
7151 | * 2) At runtime, if cpuset_cpu_active() fails to rebuild the | ||
7152 | * domains. | ||
7153 | */ | ||
7154 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
7155 | if (rq->rd) { | ||
7156 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | ||
7157 | set_rq_online(rq); | ||
7158 | } | ||
7159 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
7160 | |||
7161 | update_max_interval(); | ||
7162 | |||
7163 | return 0; | ||
7164 | } | ||
7165 | |||
7166 | int sched_cpu_deactivate(unsigned int cpu) | ||
7167 | { | ||
7168 | int ret; | ||
7169 | |||
7170 | set_cpu_active(cpu, false); | ||
7171 | /* | ||
7172 | * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU | ||
7173 | * users of this state to go away such that all new such users will | ||
7174 | * observe it. | ||
7175 | * | ||
7176 | * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might | ||
7177 | * not imply sync_sched(), so wait for both. | ||
7178 | * | ||
7179 | * Do sync before park smpboot threads to take care the rcu boost case. | ||
7180 | */ | ||
7181 | if (IS_ENABLED(CONFIG_PREEMPT)) | ||
7182 | synchronize_rcu_mult(call_rcu, call_rcu_sched); | ||
7183 | else | ||
7184 | synchronize_rcu(); | ||
7185 | |||
7186 | if (!sched_smp_initialized) | ||
7187 | return 0; | ||
7188 | |||
7189 | ret = cpuset_cpu_inactive(cpu); | ||
7190 | if (ret) { | ||
7191 | set_cpu_active(cpu, true); | ||
7192 | return ret; | ||
7193 | } | ||
7194 | sched_domains_numa_masks_clear(cpu); | ||
7195 | return 0; | ||
7196 | } | ||
7197 | |||
7198 | static void sched_rq_cpu_starting(unsigned int cpu) | ||
7199 | { | ||
7200 | struct rq *rq = cpu_rq(cpu); | ||
7201 | |||
7202 | rq->calc_load_update = calc_load_update; | ||
7203 | account_reset_rq(rq); | ||
7204 | update_max_interval(); | ||
7205 | } | ||
7206 | |||
7207 | int sched_cpu_starting(unsigned int cpu) | ||
7208 | { | ||
7209 | set_cpu_rq_start_time(cpu); | ||
7210 | sched_rq_cpu_starting(cpu); | ||
7211 | return 0; | ||
7212 | } | ||
7213 | |||
7214 | #ifdef CONFIG_HOTPLUG_CPU | ||
7215 | int sched_cpu_dying(unsigned int cpu) | ||
7216 | { | ||
7217 | struct rq *rq = cpu_rq(cpu); | ||
7218 | unsigned long flags; | ||
7219 | |||
7220 | /* Handle pending wakeups and then migrate everything off */ | ||
7221 | sched_ttwu_pending(); | ||
7222 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
7223 | if (rq->rd) { | ||
7224 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | ||
7225 | set_rq_offline(rq); | ||
7226 | } | ||
7227 | migrate_tasks(rq); | ||
7228 | BUG_ON(rq->nr_running != 1); | ||
7229 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
7230 | calc_load_migrate(rq); | ||
7231 | update_max_interval(); | ||
7232 | nohz_balance_exit_idle(cpu); | ||
7233 | hrtick_clear(rq); | ||
7234 | return 0; | ||
7235 | } | ||
7236 | #endif | ||
7237 | |||
7290 | void __init sched_init_smp(void) | 7238 | void __init sched_init_smp(void) |
7291 | { | 7239 | { |
7292 | cpumask_var_t non_isolated_cpus; | 7240 | cpumask_var_t non_isolated_cpus; |
@@ -7308,12 +7256,6 @@ void __init sched_init_smp(void) | |||
7308 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 7256 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
7309 | mutex_unlock(&sched_domains_mutex); | 7257 | mutex_unlock(&sched_domains_mutex); |
7310 | 7258 | ||
7311 | hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE); | ||
7312 | hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); | ||
7313 | hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); | ||
7314 | |||
7315 | init_hrtick(); | ||
7316 | |||
7317 | /* Move init over to a non-isolated CPU */ | 7259 | /* Move init over to a non-isolated CPU */ |
7318 | if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) | 7260 | if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) |
7319 | BUG(); | 7261 | BUG(); |
@@ -7322,7 +7264,16 @@ void __init sched_init_smp(void) | |||
7322 | 7264 | ||
7323 | init_sched_rt_class(); | 7265 | init_sched_rt_class(); |
7324 | init_sched_dl_class(); | 7266 | init_sched_dl_class(); |
7267 | sched_smp_initialized = true; | ||
7268 | } | ||
7269 | |||
7270 | static int __init migration_init(void) | ||
7271 | { | ||
7272 | sched_rq_cpu_starting(smp_processor_id()); | ||
7273 | return 0; | ||
7325 | } | 7274 | } |
7275 | early_initcall(migration_init); | ||
7276 | |||
7326 | #else | 7277 | #else |
7327 | void __init sched_init_smp(void) | 7278 | void __init sched_init_smp(void) |
7328 | { | 7279 | { |
@@ -7519,7 +7470,7 @@ void __init sched_init(void) | |||
7519 | if (cpu_isolated_map == NULL) | 7470 | if (cpu_isolated_map == NULL) |
7520 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 7471 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
7521 | idle_thread_set_boot_cpu(); | 7472 | idle_thread_set_boot_cpu(); |
7522 | set_cpu_rq_start_time(); | 7473 | set_cpu_rq_start_time(smp_processor_id()); |
7523 | #endif | 7474 | #endif |
7524 | init_sched_fair_class(); | 7475 | init_sched_fair_class(); |
7525 | 7476 | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 39fde3660f97..84e465ae7c63 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -7814,7 +7814,7 @@ static void nohz_balancer_kick(void) | |||
7814 | return; | 7814 | return; |
7815 | } | 7815 | } |
7816 | 7816 | ||
7817 | static inline void nohz_balance_exit_idle(int cpu) | 7817 | void nohz_balance_exit_idle(unsigned int cpu) |
7818 | { | 7818 | { |
7819 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { | 7819 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { |
7820 | /* | 7820 | /* |
@@ -7887,18 +7887,6 @@ void nohz_balance_enter_idle(int cpu) | |||
7887 | atomic_inc(&nohz.nr_cpus); | 7887 | atomic_inc(&nohz.nr_cpus); |
7888 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 7888 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
7889 | } | 7889 | } |
7890 | |||
7891 | static int sched_ilb_notifier(struct notifier_block *nfb, | ||
7892 | unsigned long action, void *hcpu) | ||
7893 | { | ||
7894 | switch (action & ~CPU_TASKS_FROZEN) { | ||
7895 | case CPU_DYING: | ||
7896 | nohz_balance_exit_idle(smp_processor_id()); | ||
7897 | return NOTIFY_OK; | ||
7898 | default: | ||
7899 | return NOTIFY_DONE; | ||
7900 | } | ||
7901 | } | ||
7902 | #endif | 7890 | #endif |
7903 | 7891 | ||
7904 | static DEFINE_SPINLOCK(balancing); | 7892 | static DEFINE_SPINLOCK(balancing); |
@@ -8704,7 +8692,6 @@ __init void init_sched_fair_class(void) | |||
8704 | #ifdef CONFIG_NO_HZ_COMMON | 8692 | #ifdef CONFIG_NO_HZ_COMMON |
8705 | nohz.next_balance = jiffies; | 8693 | nohz.next_balance = jiffies; |
8706 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); | 8694 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); |
8707 | cpu_notifier(sched_ilb_notifier, 0); | ||
8708 | #endif | 8695 | #endif |
8709 | #endif /* SMP */ | 8696 | #endif /* SMP */ |
8710 | 8697 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d24e91b0a722..ab6adb159e23 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1700,6 +1700,10 @@ enum rq_nohz_flag_bits { | |||
1700 | }; | 1700 | }; |
1701 | 1701 | ||
1702 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) | 1702 | #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags) |
1703 | |||
1704 | extern void nohz_balance_exit_idle(unsigned int cpu); | ||
1705 | #else | ||
1706 | static inline void nohz_balance_exit_idle(unsigned int cpu) { } | ||
1703 | #endif | 1707 | #endif |
1704 | 1708 | ||
1705 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 1709 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |