diff options
32 files changed, 154 insertions, 518 deletions
diff --git a/Documentation/scheduler/sched-pelt.c b/Documentation/scheduler/sched-pelt.c index e4219139386a..7238b355919c 100644 --- a/Documentation/scheduler/sched-pelt.c +++ b/Documentation/scheduler/sched-pelt.c | |||
@@ -20,7 +20,8 @@ void calc_runnable_avg_yN_inv(void) | |||
20 | int i; | 20 | int i; |
21 | unsigned int x; | 21 | unsigned int x; |
22 | 22 | ||
23 | printf("static const u32 runnable_avg_yN_inv[] = {"); | 23 | /* To silence -Wunused-but-set-variable warnings. */ |
24 | printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {"); | ||
24 | for (i = 0; i < HALFLIFE; i++) { | 25 | for (i = 0; i < HALFLIFE; i++) { |
25 | x = ((1UL<<32)-1)*pow(y, i); | 26 | x = ((1UL<<32)-1)*pow(y, i); |
26 | 27 | ||
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6a52d761854b..79190d877fa7 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, | |||
1831 | ti->cpu = cpu; | 1831 | ti->cpu = cpu; |
1832 | p->stack = ti; | 1832 | p->stack = ti; |
1833 | p->state = TASK_UNINTERRUPTIBLE; | 1833 | p->state = TASK_UNINTERRUPTIBLE; |
1834 | cpumask_set_cpu(cpu, &p->cpus_allowed); | 1834 | cpumask_set_cpu(cpu, &p->cpus_mask); |
1835 | INIT_LIST_HEAD(&p->tasks); | 1835 | INIT_LIST_HEAD(&p->tasks); |
1836 | p->parent = p->real_parent = p->group_leader = p; | 1836 | p->parent = p->real_parent = p->group_leader = p; |
1837 | INIT_LIST_HEAD(&p->children); | 1837 | INIT_LIST_HEAD(&p->children); |
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 0f813bb753c6..09cbe9042828 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h | |||
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task; | |||
42 | * inline to try to keep the overhead down. If we have been forced to run on | 42 | * inline to try to keep the overhead down. If we have been forced to run on |
43 | * a "CPU" with an FPU because of a previous high level of FP computation, | 43 | * a "CPU" with an FPU because of a previous high level of FP computation, |
44 | * but did not actually use the FPU during the most recent time-slice (CU1 | 44 | * but did not actually use the FPU during the most recent time-slice (CU1 |
45 | * isn't set), we undo the restriction on cpus_allowed. | 45 | * isn't set), we undo the restriction on cpus_mask. |
46 | * | 46 | * |
47 | * We're not calling set_cpus_allowed() here, because we have no need to | 47 | * We're not calling set_cpus_allowed() here, because we have no need to |
48 | * force prompt migration - we're already switching the current CPU to a | 48 | * force prompt migration - we're already switching the current CPU to a |
@@ -57,7 +57,7 @@ do { \ | |||
57 | test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \ | 57 | test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \ |
58 | (!(KSTK_STATUS(prev) & ST0_CU1))) { \ | 58 | (!(KSTK_STATUS(prev) & ST0_CU1))) { \ |
59 | clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \ | 59 | clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \ |
60 | prev->cpus_allowed = prev->thread.user_cpus_allowed; \ | 60 | prev->cpus_mask = prev->thread.user_cpus_allowed; \ |
61 | } \ | 61 | } \ |
62 | next->thread.emulated_fp = 0; \ | 62 | next->thread.emulated_fp = 0; \ |
63 | } while(0) | 63 | } while(0) |
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index a7c0f97e4b0d..1a08428eedcf 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c | |||
@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, | |||
177 | if (retval) | 177 | if (retval) |
178 | goto out_unlock; | 178 | goto out_unlock; |
179 | 179 | ||
180 | cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); | 180 | cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr); |
181 | cpumask_and(&mask, &allowed, cpu_active_mask); | 181 | cpumask_and(&mask, &allowed, cpu_active_mask); |
182 | 182 | ||
183 | out_unlock: | 183 | out_unlock: |
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c52766a5b85f..ac7159263da0 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c | |||
@@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void) | |||
891 | * restricted the allowed set to exclude any CPUs with FPUs, | 891 | * restricted the allowed set to exclude any CPUs with FPUs, |
892 | * we'll skip the procedure. | 892 | * we'll skip the procedure. |
893 | */ | 893 | */ |
894 | if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) { | 894 | if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) { |
895 | cpumask_t tmask; | 895 | cpumask_t tmask; |
896 | 896 | ||
897 | current->thread.user_cpus_allowed | 897 | current->thread.user_cpus_allowed |
898 | = current->cpus_allowed; | 898 | = current->cpus_mask; |
899 | cpumask_and(&tmask, ¤t->cpus_allowed, | 899 | cpumask_and(&tmask, ¤t->cpus_mask, |
900 | &mt_fpu_cpumask); | 900 | &mt_fpu_cpumask); |
901 | set_cpus_allowed_ptr(current, &tmask); | 901 | set_cpus_allowed_ptr(current, &tmask); |
902 | set_thread_flag(TIF_FPUBOUND); | 902 | set_thread_flag(TIF_FPUBOUND); |
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e56b553de27b..f18d5067cd0f 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
@@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx) | |||
128 | * runqueue. The context will be rescheduled on the proper node | 128 | * runqueue. The context will be rescheduled on the proper node |
129 | * if it is timesliced or preempted. | 129 | * if it is timesliced or preempted. |
130 | */ | 130 | */ |
131 | cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed); | 131 | cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr); |
132 | 132 | ||
133 | /* Save the current cpu id for spu interrupt routing. */ | 133 | /* Save the current cpu id for spu interrupt routing. */ |
134 | ctx->last_ran = raw_smp_processor_id(); | 134 | ctx->last_ran = raw_smp_processor_id(); |
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 604c0e3bcc83..f68baccc69f0 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c | |||
@@ -1503,7 +1503,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1503 | * may be scheduled elsewhere and invalidate entries in the | 1503 | * may be scheduled elsewhere and invalidate entries in the |
1504 | * pseudo-locked region. | 1504 | * pseudo-locked region. |
1505 | */ | 1505 | */ |
1506 | if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) { | 1506 | if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { |
1507 | mutex_unlock(&rdtgroup_mutex); | 1507 | mutex_unlock(&rdtgroup_mutex); |
1508 | return -EINVAL; | 1508 | return -EINVAL; |
1509 | } | 1509 | } |
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 4fe662c3bbc1..c142b23bb401 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c | |||
@@ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node) | |||
1038 | struct hfi1_affinity_node *entry; | 1038 | struct hfi1_affinity_node *entry; |
1039 | cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; | 1039 | cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; |
1040 | const struct cpumask *node_mask, | 1040 | const struct cpumask *node_mask, |
1041 | *proc_mask = ¤t->cpus_allowed; | 1041 | *proc_mask = current->cpus_ptr; |
1042 | struct hfi1_affinity_node_list *affinity = &node_affinity; | 1042 | struct hfi1_affinity_node_list *affinity = &node_affinity; |
1043 | struct cpu_mask_set *set = &affinity->proc; | 1043 | struct cpu_mask_set *set = &affinity->proc; |
1044 | 1044 | ||
@@ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node) | |||
1046 | * check whether process/context affinity has already | 1046 | * check whether process/context affinity has already |
1047 | * been set | 1047 | * been set |
1048 | */ | 1048 | */ |
1049 | if (cpumask_weight(proc_mask) == 1) { | 1049 | if (current->nr_cpus_allowed == 1) { |
1050 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", | 1050 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", |
1051 | current->pid, current->comm, | 1051 | current->pid, current->comm, |
1052 | cpumask_pr_args(proc_mask)); | 1052 | cpumask_pr_args(proc_mask)); |
@@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node) | |||
1057 | cpu = cpumask_first(proc_mask); | 1057 | cpu = cpumask_first(proc_mask); |
1058 | cpumask_set_cpu(cpu, &set->used); | 1058 | cpumask_set_cpu(cpu, &set->used); |
1059 | goto done; | 1059 | goto done; |
1060 | } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { | 1060 | } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { |
1061 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", | 1061 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", |
1062 | current->pid, current->comm, | 1062 | current->pid, current->comm, |
1063 | cpumask_pr_args(proc_mask)); | 1063 | cpumask_pr_args(proc_mask)); |
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 28b66bd70b74..2395fd4233a7 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c | |||
@@ -869,14 +869,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, | |||
869 | { | 869 | { |
870 | struct sdma_rht_node *rht_node; | 870 | struct sdma_rht_node *rht_node; |
871 | struct sdma_engine *sde = NULL; | 871 | struct sdma_engine *sde = NULL; |
872 | const struct cpumask *current_mask = ¤t->cpus_allowed; | ||
873 | unsigned long cpu_id; | 872 | unsigned long cpu_id; |
874 | 873 | ||
875 | /* | 874 | /* |
876 | * To ensure that always the same sdma engine(s) will be | 875 | * To ensure that always the same sdma engine(s) will be |
877 | * selected make sure the process is pinned to this CPU only. | 876 | * selected make sure the process is pinned to this CPU only. |
878 | */ | 877 | */ |
879 | if (cpumask_weight(current_mask) != 1) | 878 | if (current->nr_cpus_allowed != 1) |
880 | goto out; | 879 | goto out; |
881 | 880 | ||
882 | cpu_id = smp_processor_id(); | 881 | cpu_id = smp_processor_id(); |
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 78fa634de98a..27b6e664e59d 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c | |||
@@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt) | |||
1142 | static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) | 1142 | static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) |
1143 | { | 1143 | { |
1144 | struct qib_filedata *fd = fp->private_data; | 1144 | struct qib_filedata *fd = fp->private_data; |
1145 | const unsigned int weight = cpumask_weight(¤t->cpus_allowed); | 1145 | const unsigned int weight = current->nr_cpus_allowed; |
1146 | const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); | 1146 | const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); |
1147 | int local_cpu; | 1147 | int local_cpu; |
1148 | 1148 | ||
@@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) | |||
1623 | ret = find_free_ctxt(i_minor - 1, fp, uinfo); | 1623 | ret = find_free_ctxt(i_minor - 1, fp, uinfo); |
1624 | else { | 1624 | else { |
1625 | int unit; | 1625 | int unit; |
1626 | const unsigned int cpu = cpumask_first(¤t->cpus_allowed); | 1626 | const unsigned int cpu = cpumask_first(current->cpus_ptr); |
1627 | const unsigned int weight = | 1627 | const unsigned int weight = current->nr_cpus_allowed; |
1628 | cpumask_weight(¤t->cpus_allowed); | ||
1629 | 1628 | ||
1630 | if (weight == 1 && !test_bit(cpu, qib_cpulist)) | 1629 | if (weight == 1 && !test_bit(cpu, qib_cpulist)) |
1631 | if (!find_hca(cpu, &unit) && unit >= 0) | 1630 | if (!find_hca(cpu, &unit) && unit >= 0) |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..84908556ea58 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, | |||
381 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) | 381 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) |
382 | { | 382 | { |
383 | seq_printf(m, "Cpus_allowed:\t%*pb\n", | 383 | seq_printf(m, "Cpus_allowed:\t%*pb\n", |
384 | cpumask_pr_args(&task->cpus_allowed)); | 384 | cpumask_pr_args(task->cpus_ptr)); |
385 | seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", | 385 | seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", |
386 | cpumask_pr_args(&task->cpus_allowed)); | 386 | cpumask_pr_args(task->cpus_ptr)); |
387 | } | 387 | } |
388 | 388 | ||
389 | static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) | 389 | static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 11837410690f..1b2590a8d038 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -651,7 +651,8 @@ struct task_struct { | |||
651 | 651 | ||
652 | unsigned int policy; | 652 | unsigned int policy; |
653 | int nr_cpus_allowed; | 653 | int nr_cpus_allowed; |
654 | cpumask_t cpus_allowed; | 654 | const cpumask_t *cpus_ptr; |
655 | cpumask_t cpus_mask; | ||
655 | 656 | ||
656 | #ifdef CONFIG_PREEMPT_RCU | 657 | #ifdef CONFIG_PREEMPT_RCU |
657 | int rcu_read_lock_nesting; | 658 | int rcu_read_lock_nesting; |
@@ -1399,7 +1400,7 @@ extern struct pid *cad_pid; | |||
1399 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ | 1400 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
1400 | #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ | 1401 | #define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ |
1401 | #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ | 1402 | #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ |
1402 | #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ | 1403 | #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ |
1403 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ | 1404 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ |
1404 | #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ | 1405 | #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ |
1405 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ | 1406 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ |
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index b36f4cf38111..1abe91ff6e4a 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h | |||
@@ -7,14 +7,6 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | 9 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) |
10 | extern void cpu_load_update_nohz_start(void); | ||
11 | extern void cpu_load_update_nohz_stop(void); | ||
12 | #else | ||
13 | static inline void cpu_load_update_nohz_start(void) { } | ||
14 | static inline void cpu_load_update_nohz_stop(void) { } | ||
15 | #endif | ||
16 | |||
17 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | ||
18 | extern void nohz_balance_enter_idle(int cpu); | 10 | extern void nohz_balance_enter_idle(int cpu); |
19 | extern int get_nohz_timer_target(void); | 11 | extern int get_nohz_timer_target(void); |
20 | #else | 12 | #else |
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index cfc0a89a7159..53afbe07354a 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h | |||
@@ -84,11 +84,6 @@ struct sched_domain { | |||
84 | unsigned int busy_factor; /* less balancing by factor if busy */ | 84 | unsigned int busy_factor; /* less balancing by factor if busy */ |
85 | unsigned int imbalance_pct; /* No balance until over watermark */ | 85 | unsigned int imbalance_pct; /* No balance until over watermark */ |
86 | unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ | 86 | unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ |
87 | unsigned int busy_idx; | ||
88 | unsigned int idle_idx; | ||
89 | unsigned int newidle_idx; | ||
90 | unsigned int wake_idx; | ||
91 | unsigned int forkexec_idx; | ||
92 | 87 | ||
93 | int nohz_idle; /* NOHZ IDLE status */ | 88 | int nohz_idle; /* NOHZ IDLE status */ |
94 | int flags; /* See SD_* */ | 89 | int flags; /* See SD_* */ |
diff --git a/init/init_task.c b/init/init_task.c index c70ef656d0f4..3c27c0efa316 100644 --- a/init/init_task.c +++ b/init/init_task.c | |||
@@ -72,7 +72,8 @@ struct task_struct init_task | |||
72 | .static_prio = MAX_PRIO - 20, | 72 | .static_prio = MAX_PRIO - 20, |
73 | .normal_prio = MAX_PRIO - 20, | 73 | .normal_prio = MAX_PRIO - 20, |
74 | .policy = SCHED_NORMAL, | 74 | .policy = SCHED_NORMAL, |
75 | .cpus_allowed = CPU_MASK_ALL, | 75 | .cpus_ptr = &init_task.cpus_mask, |
76 | .cpus_mask = CPU_MASK_ALL, | ||
76 | .nr_cpus_allowed= NR_CPUS, | 77 | .nr_cpus_allowed= NR_CPUS, |
77 | .mm = NULL, | 78 | .mm = NULL, |
78 | .active_mm = &init_mm, | 79 | .active_mm = &init_mm, |
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 515525ff1cfd..a1590e244f5f 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
@@ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task) | |||
2829 | if (task_css_is_root(task, cpuset_cgrp_id)) | 2829 | if (task_css_is_root(task, cpuset_cgrp_id)) |
2830 | return; | 2830 | return; |
2831 | 2831 | ||
2832 | set_cpus_allowed_ptr(task, ¤t->cpus_allowed); | 2832 | set_cpus_allowed_ptr(task, current->cpus_ptr); |
2833 | task->mems_allowed = current->mems_allowed; | 2833 | task->mems_allowed = current->mems_allowed; |
2834 | } | 2834 | } |
2835 | 2835 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 75675b9bf6df..6be686283e55 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -894,6 +894,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | |||
894 | #ifdef CONFIG_STACKPROTECTOR | 894 | #ifdef CONFIG_STACKPROTECTOR |
895 | tsk->stack_canary = get_random_canary(); | 895 | tsk->stack_canary = get_random_canary(); |
896 | #endif | 896 | #endif |
897 | if (orig->cpus_ptr == &orig->cpus_mask) | ||
898 | tsk->cpus_ptr = &tsk->cpus_mask; | ||
897 | 899 | ||
898 | /* | 900 | /* |
899 | * One for us, one for whoever does the "release_task()" (usually | 901 | * One for us, one for whoever does the "release_task()" (usually |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 874c427742a9..83bd6bb32a34 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -930,7 +930,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) | |||
930 | */ | 930 | */ |
931 | static inline bool is_cpu_allowed(struct task_struct *p, int cpu) | 931 | static inline bool is_cpu_allowed(struct task_struct *p, int cpu) |
932 | { | 932 | { |
933 | if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) | 933 | if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
934 | return false; | 934 | return false; |
935 | 935 | ||
936 | if (is_per_cpu_kthread(p)) | 936 | if (is_per_cpu_kthread(p)) |
@@ -1025,7 +1025,7 @@ static int migration_cpu_stop(void *data) | |||
1025 | local_irq_disable(); | 1025 | local_irq_disable(); |
1026 | /* | 1026 | /* |
1027 | * We need to explicitly wake pending tasks before running | 1027 | * We need to explicitly wake pending tasks before running |
1028 | * __migrate_task() such that we will not miss enforcing cpus_allowed | 1028 | * __migrate_task() such that we will not miss enforcing cpus_ptr |
1029 | * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. | 1029 | * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. |
1030 | */ | 1030 | */ |
1031 | sched_ttwu_pending(); | 1031 | sched_ttwu_pending(); |
@@ -1056,7 +1056,7 @@ static int migration_cpu_stop(void *data) | |||
1056 | */ | 1056 | */ |
1057 | void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) | 1057 | void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) |
1058 | { | 1058 | { |
1059 | cpumask_copy(&p->cpus_allowed, new_mask); | 1059 | cpumask_copy(&p->cpus_mask, new_mask); |
1060 | p->nr_cpus_allowed = cpumask_weight(new_mask); | 1060 | p->nr_cpus_allowed = cpumask_weight(new_mask); |
1061 | } | 1061 | } |
1062 | 1062 | ||
@@ -1126,7 +1126,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, | |||
1126 | goto out; | 1126 | goto out; |
1127 | } | 1127 | } |
1128 | 1128 | ||
1129 | if (cpumask_equal(&p->cpus_allowed, new_mask)) | 1129 | if (cpumask_equal(p->cpus_ptr, new_mask)) |
1130 | goto out; | 1130 | goto out; |
1131 | 1131 | ||
1132 | if (!cpumask_intersects(new_mask, cpu_valid_mask)) { | 1132 | if (!cpumask_intersects(new_mask, cpu_valid_mask)) { |
@@ -1286,10 +1286,10 @@ static int migrate_swap_stop(void *data) | |||
1286 | if (task_cpu(arg->src_task) != arg->src_cpu) | 1286 | if (task_cpu(arg->src_task) != arg->src_cpu) |
1287 | goto unlock; | 1287 | goto unlock; |
1288 | 1288 | ||
1289 | if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) | 1289 | if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr)) |
1290 | goto unlock; | 1290 | goto unlock; |
1291 | 1291 | ||
1292 | if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) | 1292 | if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr)) |
1293 | goto unlock; | 1293 | goto unlock; |
1294 | 1294 | ||
1295 | __migrate_swap_task(arg->src_task, arg->dst_cpu); | 1295 | __migrate_swap_task(arg->src_task, arg->dst_cpu); |
@@ -1331,10 +1331,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, | |||
1331 | if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) | 1331 | if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) |
1332 | goto out; | 1332 | goto out; |
1333 | 1333 | ||
1334 | if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) | 1334 | if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr)) |
1335 | goto out; | 1335 | goto out; |
1336 | 1336 | ||
1337 | if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) | 1337 | if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr)) |
1338 | goto out; | 1338 | goto out; |
1339 | 1339 | ||
1340 | trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); | 1340 | trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); |
@@ -1479,7 +1479,7 @@ void kick_process(struct task_struct *p) | |||
1479 | EXPORT_SYMBOL_GPL(kick_process); | 1479 | EXPORT_SYMBOL_GPL(kick_process); |
1480 | 1480 | ||
1481 | /* | 1481 | /* |
1482 | * ->cpus_allowed is protected by both rq->lock and p->pi_lock | 1482 | * ->cpus_ptr is protected by both rq->lock and p->pi_lock |
1483 | * | 1483 | * |
1484 | * A few notes on cpu_active vs cpu_online: | 1484 | * A few notes on cpu_active vs cpu_online: |
1485 | * | 1485 | * |
@@ -1519,14 +1519,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
1519 | for_each_cpu(dest_cpu, nodemask) { | 1519 | for_each_cpu(dest_cpu, nodemask) { |
1520 | if (!cpu_active(dest_cpu)) | 1520 | if (!cpu_active(dest_cpu)) |
1521 | continue; | 1521 | continue; |
1522 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 1522 | if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) |
1523 | return dest_cpu; | 1523 | return dest_cpu; |
1524 | } | 1524 | } |
1525 | } | 1525 | } |
1526 | 1526 | ||
1527 | for (;;) { | 1527 | for (;;) { |
1528 | /* Any allowed, online CPU? */ | 1528 | /* Any allowed, online CPU? */ |
1529 | for_each_cpu(dest_cpu, &p->cpus_allowed) { | 1529 | for_each_cpu(dest_cpu, p->cpus_ptr) { |
1530 | if (!is_cpu_allowed(p, dest_cpu)) | 1530 | if (!is_cpu_allowed(p, dest_cpu)) |
1531 | continue; | 1531 | continue; |
1532 | 1532 | ||
@@ -1570,7 +1570,7 @@ out: | |||
1570 | } | 1570 | } |
1571 | 1571 | ||
1572 | /* | 1572 | /* |
1573 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. | 1573 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. |
1574 | */ | 1574 | */ |
1575 | static inline | 1575 | static inline |
1576 | int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | 1576 | int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) |
@@ -1580,11 +1580,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | |||
1580 | if (p->nr_cpus_allowed > 1) | 1580 | if (p->nr_cpus_allowed > 1) |
1581 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); | 1581 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); |
1582 | else | 1582 | else |
1583 | cpu = cpumask_any(&p->cpus_allowed); | 1583 | cpu = cpumask_any(p->cpus_ptr); |
1584 | 1584 | ||
1585 | /* | 1585 | /* |
1586 | * In order not to call set_task_cpu() on a blocking task we need | 1586 | * In order not to call set_task_cpu() on a blocking task we need |
1587 | * to rely on ttwu() to place the task on a valid ->cpus_allowed | 1587 | * to rely on ttwu() to place the task on a valid ->cpus_ptr |
1588 | * CPU. | 1588 | * CPU. |
1589 | * | 1589 | * |
1590 | * Since this is common to all placement strategies, this lives here. | 1590 | * Since this is common to all placement strategies, this lives here. |
@@ -1991,6 +1991,29 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
1991 | unsigned long flags; | 1991 | unsigned long flags; |
1992 | int cpu, success = 0; | 1992 | int cpu, success = 0; |
1993 | 1993 | ||
1994 | if (p == current) { | ||
1995 | /* | ||
1996 | * We're waking current, this means 'p->on_rq' and 'task_cpu(p) | ||
1997 | * == smp_processor_id()'. Together this means we can special | ||
1998 | * case the whole 'p->on_rq && ttwu_remote()' case below | ||
1999 | * without taking any locks. | ||
2000 | * | ||
2001 | * In particular: | ||
2002 | * - we rely on Program-Order guarantees for all the ordering, | ||
2003 | * - we're serialized against set_special_state() by virtue of | ||
2004 | * it disabling IRQs (this allows not taking ->pi_lock). | ||
2005 | */ | ||
2006 | if (!(p->state & state)) | ||
2007 | return false; | ||
2008 | |||
2009 | success = 1; | ||
2010 | cpu = task_cpu(p); | ||
2011 | trace_sched_waking(p); | ||
2012 | p->state = TASK_RUNNING; | ||
2013 | trace_sched_wakeup(p); | ||
2014 | goto out; | ||
2015 | } | ||
2016 | |||
1994 | /* | 2017 | /* |
1995 | * If we are going to wake up a thread waiting for CONDITION we | 2018 | * If we are going to wake up a thread waiting for CONDITION we |
1996 | * need to ensure that CONDITION=1 done by the caller can not be | 2019 | * need to ensure that CONDITION=1 done by the caller can not be |
@@ -2000,7 +2023,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2000 | raw_spin_lock_irqsave(&p->pi_lock, flags); | 2023 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2001 | smp_mb__after_spinlock(); | 2024 | smp_mb__after_spinlock(); |
2002 | if (!(p->state & state)) | 2025 | if (!(p->state & state)) |
2003 | goto out; | 2026 | goto unlock; |
2004 | 2027 | ||
2005 | trace_sched_waking(p); | 2028 | trace_sched_waking(p); |
2006 | 2029 | ||
@@ -2030,7 +2053,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2030 | */ | 2053 | */ |
2031 | smp_rmb(); | 2054 | smp_rmb(); |
2032 | if (p->on_rq && ttwu_remote(p, wake_flags)) | 2055 | if (p->on_rq && ttwu_remote(p, wake_flags)) |
2033 | goto stat; | 2056 | goto unlock; |
2034 | 2057 | ||
2035 | #ifdef CONFIG_SMP | 2058 | #ifdef CONFIG_SMP |
2036 | /* | 2059 | /* |
@@ -2090,10 +2113,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2090 | #endif /* CONFIG_SMP */ | 2113 | #endif /* CONFIG_SMP */ |
2091 | 2114 | ||
2092 | ttwu_queue(p, cpu, wake_flags); | 2115 | ttwu_queue(p, cpu, wake_flags); |
2093 | stat: | 2116 | unlock: |
2094 | ttwu_stat(p, cpu, wake_flags); | ||
2095 | out: | ||
2096 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 2117 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2118 | out: | ||
2119 | if (success) | ||
2120 | ttwu_stat(p, cpu, wake_flags); | ||
2097 | 2121 | ||
2098 | return success; | 2122 | return success; |
2099 | } | 2123 | } |
@@ -2395,7 +2419,7 @@ void wake_up_new_task(struct task_struct *p) | |||
2395 | #ifdef CONFIG_SMP | 2419 | #ifdef CONFIG_SMP |
2396 | /* | 2420 | /* |
2397 | * Fork balancing, do it here and not earlier because: | 2421 | * Fork balancing, do it here and not earlier because: |
2398 | * - cpus_allowed can change in the fork path | 2422 | * - cpus_ptr can change in the fork path |
2399 | * - any previously selected CPU might disappear through hotplug | 2423 | * - any previously selected CPU might disappear through hotplug |
2400 | * | 2424 | * |
2401 | * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, | 2425 | * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, |
@@ -3033,7 +3057,6 @@ void scheduler_tick(void) | |||
3033 | 3057 | ||
3034 | update_rq_clock(rq); | 3058 | update_rq_clock(rq); |
3035 | curr->sched_class->task_tick(rq, curr, 0); | 3059 | curr->sched_class->task_tick(rq, curr, 0); |
3036 | cpu_load_update_active(rq); | ||
3037 | calc_global_load_tick(rq); | 3060 | calc_global_load_tick(rq); |
3038 | psi_task_tick(rq); | 3061 | psi_task_tick(rq); |
3039 | 3062 | ||
@@ -4267,7 +4290,7 @@ change: | |||
4267 | * the entire root_domain to become SCHED_DEADLINE. We | 4290 | * the entire root_domain to become SCHED_DEADLINE. We |
4268 | * will also fail if there's no bandwidth available. | 4291 | * will also fail if there's no bandwidth available. |
4269 | */ | 4292 | */ |
4270 | if (!cpumask_subset(span, &p->cpus_allowed) || | 4293 | if (!cpumask_subset(span, p->cpus_ptr) || |
4271 | rq->rd->dl_bw.bw == 0) { | 4294 | rq->rd->dl_bw.bw == 0) { |
4272 | task_rq_unlock(rq, p, &rf); | 4295 | task_rq_unlock(rq, p, &rf); |
4273 | return -EPERM; | 4296 | return -EPERM; |
@@ -4866,7 +4889,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
4866 | goto out_unlock; | 4889 | goto out_unlock; |
4867 | 4890 | ||
4868 | raw_spin_lock_irqsave(&p->pi_lock, flags); | 4891 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
4869 | cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); | 4892 | cpumask_and(mask, &p->cpus_mask, cpu_active_mask); |
4870 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 4893 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
4871 | 4894 | ||
4872 | out_unlock: | 4895 | out_unlock: |
@@ -5123,7 +5146,7 @@ long __sched io_schedule_timeout(long timeout) | |||
5123 | } | 5146 | } |
5124 | EXPORT_SYMBOL(io_schedule_timeout); | 5147 | EXPORT_SYMBOL(io_schedule_timeout); |
5125 | 5148 | ||
5126 | void io_schedule(void) | 5149 | void __sched io_schedule(void) |
5127 | { | 5150 | { |
5128 | int token; | 5151 | int token; |
5129 | 5152 | ||
@@ -5443,7 +5466,7 @@ int task_can_attach(struct task_struct *p, | |||
5443 | * allowed nodes is unnecessary. Thus, cpusets are not | 5466 | * allowed nodes is unnecessary. Thus, cpusets are not |
5444 | * applicable for such threads. This prevents checking for | 5467 | * applicable for such threads. This prevents checking for |
5445 | * success of set_cpus_allowed_ptr() on all attached tasks | 5468 | * success of set_cpus_allowed_ptr() on all attached tasks |
5446 | * before cpus_allowed may be changed. | 5469 | * before cpus_mask may be changed. |
5447 | */ | 5470 | */ |
5448 | if (p->flags & PF_NO_SETAFFINITY) { | 5471 | if (p->flags & PF_NO_SETAFFINITY) { |
5449 | ret = -EINVAL; | 5472 | ret = -EINVAL; |
@@ -5470,7 +5493,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) | |||
5470 | if (curr_cpu == target_cpu) | 5493 | if (curr_cpu == target_cpu) |
5471 | return 0; | 5494 | return 0; |
5472 | 5495 | ||
5473 | if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) | 5496 | if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) |
5474 | return -EINVAL; | 5497 | return -EINVAL; |
5475 | 5498 | ||
5476 | /* TODO: This is not properly updating schedstats */ | 5499 | /* TODO: This is not properly updating schedstats */ |
@@ -5608,7 +5631,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) | |||
5608 | put_prev_task(rq, next); | 5631 | put_prev_task(rq, next); |
5609 | 5632 | ||
5610 | /* | 5633 | /* |
5611 | * Rules for changing task_struct::cpus_allowed are holding | 5634 | * Rules for changing task_struct::cpus_mask are holding |
5612 | * both pi_lock and rq->lock, such that holding either | 5635 | * both pi_lock and rq->lock, such that holding either |
5613 | * stabilizes the mask. | 5636 | * stabilizes the mask. |
5614 | * | 5637 | * |
@@ -5902,8 +5925,8 @@ DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); | |||
5902 | 5925 | ||
5903 | void __init sched_init(void) | 5926 | void __init sched_init(void) |
5904 | { | 5927 | { |
5905 | int i, j; | ||
5906 | unsigned long alloc_size = 0, ptr; | 5928 | unsigned long alloc_size = 0, ptr; |
5929 | int i; | ||
5907 | 5930 | ||
5908 | wait_bit_init(); | 5931 | wait_bit_init(); |
5909 | 5932 | ||
@@ -6005,10 +6028,6 @@ void __init sched_init(void) | |||
6005 | #ifdef CONFIG_RT_GROUP_SCHED | 6028 | #ifdef CONFIG_RT_GROUP_SCHED |
6006 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); | 6029 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); |
6007 | #endif | 6030 | #endif |
6008 | |||
6009 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | ||
6010 | rq->cpu_load[j] = 0; | ||
6011 | |||
6012 | #ifdef CONFIG_SMP | 6031 | #ifdef CONFIG_SMP |
6013 | rq->sd = NULL; | 6032 | rq->sd = NULL; |
6014 | rq->rd = NULL; | 6033 | rq->rd = NULL; |
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index ec4e4a9aab5f..5cc4012572ec 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c | |||
@@ -120,14 +120,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |||
120 | const struct sched_dl_entity *dl_se = &p->dl; | 120 | const struct sched_dl_entity *dl_se = &p->dl; |
121 | 121 | ||
122 | if (later_mask && | 122 | if (later_mask && |
123 | cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { | 123 | cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) { |
124 | return 1; | 124 | return 1; |
125 | } else { | 125 | } else { |
126 | int best_cpu = cpudl_maximum(cp); | 126 | int best_cpu = cpudl_maximum(cp); |
127 | 127 | ||
128 | WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); | 128 | WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); |
129 | 129 | ||
130 | if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && | 130 | if (cpumask_test_cpu(best_cpu, p->cpus_ptr) && |
131 | dl_time_before(dl_se->deadline, cp->elements[0].dl)) { | 131 | dl_time_before(dl_se->deadline, cp->elements[0].dl)) { |
132 | if (later_mask) | 132 | if (later_mask) |
133 | cpumask_set_cpu(best_cpu, later_mask); | 133 | cpumask_set_cpu(best_cpu, later_mask); |
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 9c6480e6d62d..b7abca987d94 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c | |||
@@ -94,11 +94,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
94 | if (skip) | 94 | if (skip) |
95 | continue; | 95 | continue; |
96 | 96 | ||
97 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | 97 | if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) |
98 | continue; | 98 | continue; |
99 | 99 | ||
100 | if (lowest_mask) { | 100 | if (lowest_mask) { |
101 | cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | 101 | cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * We have to ensure that we have at least one bit | 104 | * We have to ensure that we have at least one bit |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 43901fa3f269..c1ef30861068 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -538,7 +538,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p | |||
538 | * If we cannot preempt any rq, fall back to pick any | 538 | * If we cannot preempt any rq, fall back to pick any |
539 | * online CPU: | 539 | * online CPU: |
540 | */ | 540 | */ |
541 | cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | 541 | cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr); |
542 | if (cpu >= nr_cpu_ids) { | 542 | if (cpu >= nr_cpu_ids) { |
543 | /* | 543 | /* |
544 | * Failed to find any suitable CPU. | 544 | * Failed to find any suitable CPU. |
@@ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *rq) | |||
1824 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) | 1824 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) |
1825 | { | 1825 | { |
1826 | if (!task_running(rq, p) && | 1826 | if (!task_running(rq, p) && |
1827 | cpumask_test_cpu(cpu, &p->cpus_allowed)) | 1827 | cpumask_test_cpu(cpu, p->cpus_ptr)) |
1828 | return 1; | 1828 | return 1; |
1829 | return 0; | 1829 | return 0; |
1830 | } | 1830 | } |
@@ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | |||
1974 | /* Retry if something changed. */ | 1974 | /* Retry if something changed. */ |
1975 | if (double_lock_balance(rq, later_rq)) { | 1975 | if (double_lock_balance(rq, later_rq)) { |
1976 | if (unlikely(task_rq(task) != rq || | 1976 | if (unlikely(task_rq(task) != rq || |
1977 | !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || | 1977 | !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) || |
1978 | task_running(rq, task) || | 1978 | task_running(rq, task) || |
1979 | !dl_task(task) || | 1979 | !dl_task(task) || |
1980 | !task_on_rq_queued(task))) { | 1980 | !task_on_rq_queued(task))) { |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 14c6a8716ba1..f7e4579e746c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -233,49 +233,35 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) | |||
233 | *tablep = NULL; | 233 | *tablep = NULL; |
234 | } | 234 | } |
235 | 235 | ||
236 | static int min_load_idx = 0; | ||
237 | static int max_load_idx = CPU_LOAD_IDX_MAX-1; | ||
238 | |||
239 | static void | 236 | static void |
240 | set_table_entry(struct ctl_table *entry, | 237 | set_table_entry(struct ctl_table *entry, |
241 | const char *procname, void *data, int maxlen, | 238 | const char *procname, void *data, int maxlen, |
242 | umode_t mode, proc_handler *proc_handler, | 239 | umode_t mode, proc_handler *proc_handler) |
243 | bool load_idx) | ||
244 | { | 240 | { |
245 | entry->procname = procname; | 241 | entry->procname = procname; |
246 | entry->data = data; | 242 | entry->data = data; |
247 | entry->maxlen = maxlen; | 243 | entry->maxlen = maxlen; |
248 | entry->mode = mode; | 244 | entry->mode = mode; |
249 | entry->proc_handler = proc_handler; | 245 | entry->proc_handler = proc_handler; |
250 | |||
251 | if (load_idx) { | ||
252 | entry->extra1 = &min_load_idx; | ||
253 | entry->extra2 = &max_load_idx; | ||
254 | } | ||
255 | } | 246 | } |
256 | 247 | ||
257 | static struct ctl_table * | 248 | static struct ctl_table * |
258 | sd_alloc_ctl_domain_table(struct sched_domain *sd) | 249 | sd_alloc_ctl_domain_table(struct sched_domain *sd) |
259 | { | 250 | { |
260 | struct ctl_table *table = sd_alloc_ctl_entry(14); | 251 | struct ctl_table *table = sd_alloc_ctl_entry(9); |
261 | 252 | ||
262 | if (table == NULL) | 253 | if (table == NULL) |
263 | return NULL; | 254 | return NULL; |
264 | 255 | ||
265 | set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); | 256 | set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); |
266 | set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); | 257 | set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); |
267 | set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); | 258 | set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); |
268 | set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); | 259 | set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); |
269 | set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); | 260 | set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); |
270 | set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); | 261 | set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); |
271 | set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); | 262 | set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax); |
272 | set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false); | 263 | set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring); |
273 | set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false); | 264 | /* &table[8] is terminator */ |
274 | set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false); | ||
275 | set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false); | ||
276 | set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false); | ||
277 | set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false); | ||
278 | /* &table[13] is terminator */ | ||
279 | 265 | ||
280 | return table; | 266 | return table; |
281 | } | 267 | } |
@@ -653,8 +639,6 @@ do { \ | |||
653 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) | 639 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) |
654 | 640 | ||
655 | P(nr_running); | 641 | P(nr_running); |
656 | SEQ_printf(m, " .%-30s: %lu\n", "load", | ||
657 | rq->load.weight); | ||
658 | P(nr_switches); | 642 | P(nr_switches); |
659 | P(nr_load_updates); | 643 | P(nr_load_updates); |
660 | P(nr_uninterruptible); | 644 | P(nr_uninterruptible); |
@@ -662,11 +646,6 @@ do { \ | |||
662 | SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); | 646 | SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); |
663 | PN(clock); | 647 | PN(clock); |
664 | PN(clock_task); | 648 | PN(clock_task); |
665 | P(cpu_load[0]); | ||
666 | P(cpu_load[1]); | ||
667 | P(cpu_load[2]); | ||
668 | P(cpu_load[3]); | ||
669 | P(cpu_load[4]); | ||
670 | #undef P | 649 | #undef P |
671 | #undef PN | 650 | #undef PN |
672 | 651 | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f35930f5e528..3c11dcdedcbc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1467,8 +1467,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, | |||
1467 | } | 1467 | } |
1468 | 1468 | ||
1469 | static unsigned long weighted_cpuload(struct rq *rq); | 1469 | static unsigned long weighted_cpuload(struct rq *rq); |
1470 | static unsigned long source_load(int cpu, int type); | ||
1471 | static unsigned long target_load(int cpu, int type); | ||
1472 | 1470 | ||
1473 | /* Cached statistics for all CPUs within a node */ | 1471 | /* Cached statistics for all CPUs within a node */ |
1474 | struct numa_stats { | 1472 | struct numa_stats { |
@@ -1621,7 +1619,7 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1621 | * be incurred if the tasks were swapped. | 1619 | * be incurred if the tasks were swapped. |
1622 | */ | 1620 | */ |
1623 | /* Skip this swap candidate if cannot move to the source cpu */ | 1621 | /* Skip this swap candidate if cannot move to the source cpu */ |
1624 | if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) | 1622 | if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr)) |
1625 | goto unlock; | 1623 | goto unlock; |
1626 | 1624 | ||
1627 | /* | 1625 | /* |
@@ -1718,7 +1716,7 @@ static void task_numa_find_cpu(struct task_numa_env *env, | |||
1718 | 1716 | ||
1719 | for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { | 1717 | for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { |
1720 | /* Skip this CPU if the source task cannot migrate */ | 1718 | /* Skip this CPU if the source task cannot migrate */ |
1721 | if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) | 1719 | if (!cpumask_test_cpu(cpu, env->p->cpus_ptr)) |
1722 | continue; | 1720 | continue; |
1723 | 1721 | ||
1724 | env->dst_cpu = cpu; | 1722 | env->dst_cpu = cpu; |
@@ -2686,8 +2684,6 @@ static void | |||
2686 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 2684 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
2687 | { | 2685 | { |
2688 | update_load_add(&cfs_rq->load, se->load.weight); | 2686 | update_load_add(&cfs_rq->load, se->load.weight); |
2689 | if (!parent_entity(se)) | ||
2690 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); | ||
2691 | #ifdef CONFIG_SMP | 2687 | #ifdef CONFIG_SMP |
2692 | if (entity_is_task(se)) { | 2688 | if (entity_is_task(se)) { |
2693 | struct rq *rq = rq_of(cfs_rq); | 2689 | struct rq *rq = rq_of(cfs_rq); |
@@ -2703,8 +2699,6 @@ static void | |||
2703 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 2699 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
2704 | { | 2700 | { |
2705 | update_load_sub(&cfs_rq->load, se->load.weight); | 2701 | update_load_sub(&cfs_rq->load, se->load.weight); |
2706 | if (!parent_entity(se)) | ||
2707 | update_load_sub(&rq_of(cfs_rq)->load, se->load.weight); | ||
2708 | #ifdef CONFIG_SMP | 2702 | #ifdef CONFIG_SMP |
2709 | if (entity_is_task(se)) { | 2703 | if (entity_is_task(se)) { |
2710 | account_numa_dequeue(rq_of(cfs_rq), task_of(se)); | 2704 | account_numa_dequeue(rq_of(cfs_rq), task_of(se)); |
@@ -4100,7 +4094,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
4100 | * least twice that of our own weight (i.e. dont track it | 4094 | * least twice that of our own weight (i.e. dont track it |
4101 | * when there are only lesser-weight tasks around): | 4095 | * when there are only lesser-weight tasks around): |
4102 | */ | 4096 | */ |
4103 | if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) { | 4097 | if (schedstat_enabled() && |
4098 | rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) { | ||
4104 | schedstat_set(se->statistics.slice_max, | 4099 | schedstat_set(se->statistics.slice_max, |
4105 | max((u64)schedstat_val(se->statistics.slice_max), | 4100 | max((u64)schedstat_val(se->statistics.slice_max), |
4106 | se->sum_exec_runtime - se->prev_sum_exec_runtime)); | 4101 | se->sum_exec_runtime - se->prev_sum_exec_runtime)); |
@@ -4734,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b) | |||
4734 | if (runtime_refresh_within(cfs_b, min_left)) | 4729 | if (runtime_refresh_within(cfs_b, min_left)) |
4735 | return; | 4730 | return; |
4736 | 4731 | ||
4732 | /* don't push forwards an existing deferred unthrottle */ | ||
4733 | if (cfs_b->slack_started) | ||
4734 | return; | ||
4735 | cfs_b->slack_started = true; | ||
4736 | |||
4737 | hrtimer_start(&cfs_b->slack_timer, | 4737 | hrtimer_start(&cfs_b->slack_timer, |
4738 | ns_to_ktime(cfs_bandwidth_slack_period), | 4738 | ns_to_ktime(cfs_bandwidth_slack_period), |
4739 | HRTIMER_MODE_REL); | 4739 | HRTIMER_MODE_REL); |
@@ -4787,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) | |||
4787 | 4787 | ||
4788 | /* confirm we're still not at a refresh boundary */ | 4788 | /* confirm we're still not at a refresh boundary */ |
4789 | raw_spin_lock_irqsave(&cfs_b->lock, flags); | 4789 | raw_spin_lock_irqsave(&cfs_b->lock, flags); |
4790 | cfs_b->slack_started = false; | ||
4790 | if (cfs_b->distribute_running) { | 4791 | if (cfs_b->distribute_running) { |
4791 | raw_spin_unlock_irqrestore(&cfs_b->lock, flags); | 4792 | raw_spin_unlock_irqrestore(&cfs_b->lock, flags); |
4792 | return; | 4793 | return; |
@@ -4950,6 +4951,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
4950 | hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 4951 | hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
4951 | cfs_b->slack_timer.function = sched_cfs_slack_timer; | 4952 | cfs_b->slack_timer.function = sched_cfs_slack_timer; |
4952 | cfs_b->distribute_running = 0; | 4953 | cfs_b->distribute_running = 0; |
4954 | cfs_b->slack_started = false; | ||
4953 | } | 4955 | } |
4954 | 4956 | ||
4955 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 4957 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
@@ -5325,71 +5327,6 @@ DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); | |||
5325 | DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); | 5327 | DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); |
5326 | 5328 | ||
5327 | #ifdef CONFIG_NO_HZ_COMMON | 5329 | #ifdef CONFIG_NO_HZ_COMMON |
5328 | /* | ||
5329 | * per rq 'load' arrray crap; XXX kill this. | ||
5330 | */ | ||
5331 | |||
5332 | /* | ||
5333 | * The exact cpuload calculated at every tick would be: | ||
5334 | * | ||
5335 | * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load | ||
5336 | * | ||
5337 | * If a CPU misses updates for n ticks (as it was idle) and update gets | ||
5338 | * called on the n+1-th tick when CPU may be busy, then we have: | ||
5339 | * | ||
5340 | * load_n = (1 - 1/2^i)^n * load_0 | ||
5341 | * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load | ||
5342 | * | ||
5343 | * decay_load_missed() below does efficient calculation of | ||
5344 | * | ||
5345 | * load' = (1 - 1/2^i)^n * load | ||
5346 | * | ||
5347 | * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors. | ||
5348 | * This allows us to precompute the above in said factors, thereby allowing the | ||
5349 | * reduction of an arbitrary n in O(log_2 n) steps. (See also | ||
5350 | * fixed_power_int()) | ||
5351 | * | ||
5352 | * The calculation is approximated on a 128 point scale. | ||
5353 | */ | ||
5354 | #define DEGRADE_SHIFT 7 | ||
5355 | |||
5356 | static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128}; | ||
5357 | static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = { | ||
5358 | { 0, 0, 0, 0, 0, 0, 0, 0 }, | ||
5359 | { 64, 32, 8, 0, 0, 0, 0, 0 }, | ||
5360 | { 96, 72, 40, 12, 1, 0, 0, 0 }, | ||
5361 | { 112, 98, 75, 43, 15, 1, 0, 0 }, | ||
5362 | { 120, 112, 98, 76, 45, 16, 2, 0 } | ||
5363 | }; | ||
5364 | |||
5365 | /* | ||
5366 | * Update cpu_load for any missed ticks, due to tickless idle. The backlog | ||
5367 | * would be when CPU is idle and so we just decay the old load without | ||
5368 | * adding any new load. | ||
5369 | */ | ||
5370 | static unsigned long | ||
5371 | decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | ||
5372 | { | ||
5373 | int j = 0; | ||
5374 | |||
5375 | if (!missed_updates) | ||
5376 | return load; | ||
5377 | |||
5378 | if (missed_updates >= degrade_zero_ticks[idx]) | ||
5379 | return 0; | ||
5380 | |||
5381 | if (idx == 1) | ||
5382 | return load >> missed_updates; | ||
5383 | |||
5384 | while (missed_updates) { | ||
5385 | if (missed_updates % 2) | ||
5386 | load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT; | ||
5387 | |||
5388 | missed_updates >>= 1; | ||
5389 | j++; | ||
5390 | } | ||
5391 | return load; | ||
5392 | } | ||
5393 | 5330 | ||
5394 | static struct { | 5331 | static struct { |
5395 | cpumask_var_t idle_cpus_mask; | 5332 | cpumask_var_t idle_cpus_mask; |
@@ -5401,234 +5338,11 @@ static struct { | |||
5401 | 5338 | ||
5402 | #endif /* CONFIG_NO_HZ_COMMON */ | 5339 | #endif /* CONFIG_NO_HZ_COMMON */ |
5403 | 5340 | ||
5404 | /** | ||
5405 | * __cpu_load_update - update the rq->cpu_load[] statistics | ||
5406 | * @this_rq: The rq to update statistics for | ||
5407 | * @this_load: The current load | ||
5408 | * @pending_updates: The number of missed updates | ||
5409 | * | ||
5410 | * Update rq->cpu_load[] statistics. This function is usually called every | ||
5411 | * scheduler tick (TICK_NSEC). | ||
5412 | * | ||
5413 | * This function computes a decaying average: | ||
5414 | * | ||
5415 | * load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load | ||
5416 | * | ||
5417 | * Because of NOHZ it might not get called on every tick which gives need for | ||
5418 | * the @pending_updates argument. | ||
5419 | * | ||
5420 | * load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1 | ||
5421 | * = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load | ||
5422 | * = A * (A * load[i]_n-2 + B) + B | ||
5423 | * = A * (A * (A * load[i]_n-3 + B) + B) + B | ||
5424 | * = A^3 * load[i]_n-3 + (A^2 + A + 1) * B | ||
5425 | * = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B | ||
5426 | * = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B | ||
5427 | * = (1 - 1/2^i)^n * (load[i]_0 - load) + load | ||
5428 | * | ||
5429 | * In the above we've assumed load_n := load, which is true for NOHZ_FULL as | ||
5430 | * any change in load would have resulted in the tick being turned back on. | ||
5431 | * | ||
5432 | * For regular NOHZ, this reduces to: | ||
5433 | * | ||
5434 | * load[i]_n = (1 - 1/2^i)^n * load[i]_0 | ||
5435 | * | ||
5436 | * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra | ||
5437 | * term. | ||
5438 | */ | ||
5439 | static void cpu_load_update(struct rq *this_rq, unsigned long this_load, | ||
5440 | unsigned long pending_updates) | ||
5441 | { | ||
5442 | unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0]; | ||
5443 | int i, scale; | ||
5444 | |||
5445 | this_rq->nr_load_updates++; | ||
5446 | |||
5447 | /* Update our load: */ | ||
5448 | this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */ | ||
5449 | for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | ||
5450 | unsigned long old_load, new_load; | ||
5451 | |||
5452 | /* scale is effectively 1 << i now, and >> i divides by scale */ | ||
5453 | |||
5454 | old_load = this_rq->cpu_load[i]; | ||
5455 | #ifdef CONFIG_NO_HZ_COMMON | ||
5456 | old_load = decay_load_missed(old_load, pending_updates - 1, i); | ||
5457 | if (tickless_load) { | ||
5458 | old_load -= decay_load_missed(tickless_load, pending_updates - 1, i); | ||
5459 | /* | ||
5460 | * old_load can never be a negative value because a | ||
5461 | * decayed tickless_load cannot be greater than the | ||
5462 | * original tickless_load. | ||
5463 | */ | ||
5464 | old_load += tickless_load; | ||
5465 | } | ||
5466 | #endif | ||
5467 | new_load = this_load; | ||
5468 | /* | ||
5469 | * Round up the averaging division if load is increasing. This | ||
5470 | * prevents us from getting stuck on 9 if the load is 10, for | ||
5471 | * example. | ||
5472 | */ | ||
5473 | if (new_load > old_load) | ||
5474 | new_load += scale - 1; | ||
5475 | |||
5476 | this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; | ||
5477 | } | ||
5478 | } | ||
5479 | |||
5480 | /* Used instead of source_load when we know the type == 0 */ | ||
5481 | static unsigned long weighted_cpuload(struct rq *rq) | 5341 | static unsigned long weighted_cpuload(struct rq *rq) |
5482 | { | 5342 | { |
5483 | return cfs_rq_runnable_load_avg(&rq->cfs); | 5343 | return cfs_rq_runnable_load_avg(&rq->cfs); |
5484 | } | 5344 | } |
5485 | 5345 | ||
5486 | #ifdef CONFIG_NO_HZ_COMMON | ||
5487 | /* | ||
5488 | * There is no sane way to deal with nohz on smp when using jiffies because the | ||
5489 | * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading | ||
5490 | * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. | ||
5491 | * | ||
5492 | * Therefore we need to avoid the delta approach from the regular tick when | ||
5493 | * possible since that would seriously skew the load calculation. This is why we | ||
5494 | * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on | ||
5495 | * jiffies deltas for updates happening while in nohz mode (idle ticks, idle | ||
5496 | * loop exit, nohz_idle_balance, nohz full exit...) | ||
5497 | * | ||
5498 | * This means we might still be one tick off for nohz periods. | ||
5499 | */ | ||
5500 | |||
5501 | static void cpu_load_update_nohz(struct rq *this_rq, | ||
5502 | unsigned long curr_jiffies, | ||
5503 | unsigned long load) | ||
5504 | { | ||
5505 | unsigned long pending_updates; | ||
5506 | |||
5507 | pending_updates = curr_jiffies - this_rq->last_load_update_tick; | ||
5508 | if (pending_updates) { | ||
5509 | this_rq->last_load_update_tick = curr_jiffies; | ||
5510 | /* | ||
5511 | * In the regular NOHZ case, we were idle, this means load 0. | ||
5512 | * In the NOHZ_FULL case, we were non-idle, we should consider | ||
5513 | * its weighted load. | ||
5514 | */ | ||
5515 | cpu_load_update(this_rq, load, pending_updates); | ||
5516 | } | ||
5517 | } | ||
5518 | |||
5519 | /* | ||
5520 | * Called from nohz_idle_balance() to update the load ratings before doing the | ||
5521 | * idle balance. | ||
5522 | */ | ||
5523 | static void cpu_load_update_idle(struct rq *this_rq) | ||
5524 | { | ||
5525 | /* | ||
5526 | * bail if there's load or we're actually up-to-date. | ||
5527 | */ | ||
5528 | if (weighted_cpuload(this_rq)) | ||
5529 | return; | ||
5530 | |||
5531 | cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0); | ||
5532 | } | ||
5533 | |||
5534 | /* | ||
5535 | * Record CPU load on nohz entry so we know the tickless load to account | ||
5536 | * on nohz exit. cpu_load[0] happens then to be updated more frequently | ||
5537 | * than other cpu_load[idx] but it should be fine as cpu_load readers | ||
5538 | * shouldn't rely into synchronized cpu_load[*] updates. | ||
5539 | */ | ||
5540 | void cpu_load_update_nohz_start(void) | ||
5541 | { | ||
5542 | struct rq *this_rq = this_rq(); | ||
5543 | |||
5544 | /* | ||
5545 | * This is all lockless but should be fine. If weighted_cpuload changes | ||
5546 | * concurrently we'll exit nohz. And cpu_load write can race with | ||
5547 | * cpu_load_update_idle() but both updater would be writing the same. | ||
5548 | */ | ||
5549 | this_rq->cpu_load[0] = weighted_cpuload(this_rq); | ||
5550 | } | ||
5551 | |||
5552 | /* | ||
5553 | * Account the tickless load in the end of a nohz frame. | ||
5554 | */ | ||
5555 | void cpu_load_update_nohz_stop(void) | ||
5556 | { | ||
5557 | unsigned long curr_jiffies = READ_ONCE(jiffies); | ||
5558 | struct rq *this_rq = this_rq(); | ||
5559 | unsigned long load; | ||
5560 | struct rq_flags rf; | ||
5561 | |||
5562 | if (curr_jiffies == this_rq->last_load_update_tick) | ||
5563 | return; | ||
5564 | |||
5565 | load = weighted_cpuload(this_rq); | ||
5566 | rq_lock(this_rq, &rf); | ||
5567 | update_rq_clock(this_rq); | ||
5568 | cpu_load_update_nohz(this_rq, curr_jiffies, load); | ||
5569 | rq_unlock(this_rq, &rf); | ||
5570 | } | ||
5571 | #else /* !CONFIG_NO_HZ_COMMON */ | ||
5572 | static inline void cpu_load_update_nohz(struct rq *this_rq, | ||
5573 | unsigned long curr_jiffies, | ||
5574 | unsigned long load) { } | ||
5575 | #endif /* CONFIG_NO_HZ_COMMON */ | ||
5576 | |||
5577 | static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load) | ||
5578 | { | ||
5579 | #ifdef CONFIG_NO_HZ_COMMON | ||
5580 | /* See the mess around cpu_load_update_nohz(). */ | ||
5581 | this_rq->last_load_update_tick = READ_ONCE(jiffies); | ||
5582 | #endif | ||
5583 | cpu_load_update(this_rq, load, 1); | ||
5584 | } | ||
5585 | |||
5586 | /* | ||
5587 | * Called from scheduler_tick() | ||
5588 | */ | ||
5589 | void cpu_load_update_active(struct rq *this_rq) | ||
5590 | { | ||
5591 | unsigned long load = weighted_cpuload(this_rq); | ||
5592 | |||
5593 | if (tick_nohz_tick_stopped()) | ||
5594 | cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load); | ||
5595 | else | ||
5596 | cpu_load_update_periodic(this_rq, load); | ||
5597 | } | ||
5598 | |||
5599 | /* | ||
5600 | * Return a low guess at the load of a migration-source CPU weighted | ||
5601 | * according to the scheduling class and "nice" value. | ||
5602 | * | ||
5603 | * We want to under-estimate the load of migration sources, to | ||
5604 | * balance conservatively. | ||
5605 | */ | ||
5606 | static unsigned long source_load(int cpu, int type) | ||
5607 | { | ||
5608 | struct rq *rq = cpu_rq(cpu); | ||
5609 | unsigned long total = weighted_cpuload(rq); | ||
5610 | |||
5611 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
5612 | return total; | ||
5613 | |||
5614 | return min(rq->cpu_load[type-1], total); | ||
5615 | } | ||
5616 | |||
5617 | /* | ||
5618 | * Return a high guess at the load of a migration-target CPU weighted | ||
5619 | * according to the scheduling class and "nice" value. | ||
5620 | */ | ||
5621 | static unsigned long target_load(int cpu, int type) | ||
5622 | { | ||
5623 | struct rq *rq = cpu_rq(cpu); | ||
5624 | unsigned long total = weighted_cpuload(rq); | ||
5625 | |||
5626 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
5627 | return total; | ||
5628 | |||
5629 | return max(rq->cpu_load[type-1], total); | ||
5630 | } | ||
5631 | |||
5632 | static unsigned long capacity_of(int cpu) | 5346 | static unsigned long capacity_of(int cpu) |
5633 | { | 5347 | { |
5634 | return cpu_rq(cpu)->cpu_capacity; | 5348 | return cpu_rq(cpu)->cpu_capacity; |
@@ -5736,7 +5450,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, | |||
5736 | s64 this_eff_load, prev_eff_load; | 5450 | s64 this_eff_load, prev_eff_load; |
5737 | unsigned long task_load; | 5451 | unsigned long task_load; |
5738 | 5452 | ||
5739 | this_eff_load = target_load(this_cpu, sd->wake_idx); | 5453 | this_eff_load = weighted_cpuload(cpu_rq(this_cpu)); |
5740 | 5454 | ||
5741 | if (sync) { | 5455 | if (sync) { |
5742 | unsigned long current_load = task_h_load(current); | 5456 | unsigned long current_load = task_h_load(current); |
@@ -5754,7 +5468,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, | |||
5754 | this_eff_load *= 100; | 5468 | this_eff_load *= 100; |
5755 | this_eff_load *= capacity_of(prev_cpu); | 5469 | this_eff_load *= capacity_of(prev_cpu); |
5756 | 5470 | ||
5757 | prev_eff_load = source_load(prev_cpu, sd->wake_idx); | 5471 | prev_eff_load = weighted_cpuload(cpu_rq(prev_cpu)); |
5758 | prev_eff_load -= task_load; | 5472 | prev_eff_load -= task_load; |
5759 | if (sched_feat(WA_BIAS)) | 5473 | if (sched_feat(WA_BIAS)) |
5760 | prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; | 5474 | prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; |
@@ -5815,14 +5529,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
5815 | unsigned long this_runnable_load = ULONG_MAX; | 5529 | unsigned long this_runnable_load = ULONG_MAX; |
5816 | unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX; | 5530 | unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX; |
5817 | unsigned long most_spare = 0, this_spare = 0; | 5531 | unsigned long most_spare = 0, this_spare = 0; |
5818 | int load_idx = sd->forkexec_idx; | ||
5819 | int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; | 5532 | int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; |
5820 | unsigned long imbalance = scale_load_down(NICE_0_LOAD) * | 5533 | unsigned long imbalance = scale_load_down(NICE_0_LOAD) * |
5821 | (sd->imbalance_pct-100) / 100; | 5534 | (sd->imbalance_pct-100) / 100; |
5822 | 5535 | ||
5823 | if (sd_flag & SD_BALANCE_WAKE) | ||
5824 | load_idx = sd->wake_idx; | ||
5825 | |||
5826 | do { | 5536 | do { |
5827 | unsigned long load, avg_load, runnable_load; | 5537 | unsigned long load, avg_load, runnable_load; |
5828 | unsigned long spare_cap, max_spare_cap; | 5538 | unsigned long spare_cap, max_spare_cap; |
@@ -5831,7 +5541,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
5831 | 5541 | ||
5832 | /* Skip over this group if it has no CPUs allowed */ | 5542 | /* Skip over this group if it has no CPUs allowed */ |
5833 | if (!cpumask_intersects(sched_group_span(group), | 5543 | if (!cpumask_intersects(sched_group_span(group), |
5834 | &p->cpus_allowed)) | 5544 | p->cpus_ptr)) |
5835 | continue; | 5545 | continue; |
5836 | 5546 | ||
5837 | local_group = cpumask_test_cpu(this_cpu, | 5547 | local_group = cpumask_test_cpu(this_cpu, |
@@ -5846,12 +5556,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
5846 | max_spare_cap = 0; | 5556 | max_spare_cap = 0; |
5847 | 5557 | ||
5848 | for_each_cpu(i, sched_group_span(group)) { | 5558 | for_each_cpu(i, sched_group_span(group)) { |
5849 | /* Bias balancing toward CPUs of our domain */ | 5559 | load = weighted_cpuload(cpu_rq(i)); |
5850 | if (local_group) | ||
5851 | load = source_load(i, load_idx); | ||
5852 | else | ||
5853 | load = target_load(i, load_idx); | ||
5854 | |||
5855 | runnable_load += load; | 5560 | runnable_load += load; |
5856 | 5561 | ||
5857 | avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); | 5562 | avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); |
@@ -5963,7 +5668,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this | |||
5963 | return cpumask_first(sched_group_span(group)); | 5668 | return cpumask_first(sched_group_span(group)); |
5964 | 5669 | ||
5965 | /* Traverse only the allowed CPUs */ | 5670 | /* Traverse only the allowed CPUs */ |
5966 | for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { | 5671 | for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { |
5967 | if (available_idle_cpu(i)) { | 5672 | if (available_idle_cpu(i)) { |
5968 | struct rq *rq = cpu_rq(i); | 5673 | struct rq *rq = cpu_rq(i); |
5969 | struct cpuidle_state *idle = idle_get_state(rq); | 5674 | struct cpuidle_state *idle = idle_get_state(rq); |
@@ -6003,7 +5708,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p | |||
6003 | { | 5708 | { |
6004 | int new_cpu = cpu; | 5709 | int new_cpu = cpu; |
6005 | 5710 | ||
6006 | if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) | 5711 | if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr)) |
6007 | return prev_cpu; | 5712 | return prev_cpu; |
6008 | 5713 | ||
6009 | /* | 5714 | /* |
@@ -6120,7 +5825,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int | |||
6120 | if (!test_idle_cores(target, false)) | 5825 | if (!test_idle_cores(target, false)) |
6121 | return -1; | 5826 | return -1; |
6122 | 5827 | ||
6123 | cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); | 5828 | cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); |
6124 | 5829 | ||
6125 | for_each_cpu_wrap(core, cpus, target) { | 5830 | for_each_cpu_wrap(core, cpus, target) { |
6126 | bool idle = true; | 5831 | bool idle = true; |
@@ -6154,7 +5859,7 @@ static int select_idle_smt(struct task_struct *p, int target) | |||
6154 | return -1; | 5859 | return -1; |
6155 | 5860 | ||
6156 | for_each_cpu(cpu, cpu_smt_mask(target)) { | 5861 | for_each_cpu(cpu, cpu_smt_mask(target)) { |
6157 | if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) | 5862 | if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
6158 | continue; | 5863 | continue; |
6159 | if (available_idle_cpu(cpu)) | 5864 | if (available_idle_cpu(cpu)) |
6160 | return cpu; | 5865 | return cpu; |
@@ -6217,7 +5922,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t | |||
6217 | for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { | 5922 | for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { |
6218 | if (!--nr) | 5923 | if (!--nr) |
6219 | return -1; | 5924 | return -1; |
6220 | if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) | 5925 | if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
6221 | continue; | 5926 | continue; |
6222 | if (available_idle_cpu(cpu)) | 5927 | if (available_idle_cpu(cpu)) |
6223 | break; | 5928 | break; |
@@ -6254,7 +5959,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) | |||
6254 | recent_used_cpu != target && | 5959 | recent_used_cpu != target && |
6255 | cpus_share_cache(recent_used_cpu, target) && | 5960 | cpus_share_cache(recent_used_cpu, target) && |
6256 | available_idle_cpu(recent_used_cpu) && | 5961 | available_idle_cpu(recent_used_cpu) && |
6257 | cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { | 5962 | cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { |
6258 | /* | 5963 | /* |
6259 | * Replace recent_used_cpu with prev as it is a potential | 5964 | * Replace recent_used_cpu with prev as it is a potential |
6260 | * candidate for the next wake: | 5965 | * candidate for the next wake: |
@@ -6600,7 +6305,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) | |||
6600 | int max_spare_cap_cpu = -1; | 6305 | int max_spare_cap_cpu = -1; |
6601 | 6306 | ||
6602 | for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { | 6307 | for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { |
6603 | if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) | 6308 | if (!cpumask_test_cpu(cpu, p->cpus_ptr)) |
6604 | continue; | 6309 | continue; |
6605 | 6310 | ||
6606 | /* Skip CPUs that will be overutilized. */ | 6311 | /* Skip CPUs that will be overutilized. */ |
@@ -6689,7 +6394,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f | |||
6689 | } | 6394 | } |
6690 | 6395 | ||
6691 | want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) && | 6396 | want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) && |
6692 | cpumask_test_cpu(cpu, &p->cpus_allowed); | 6397 | cpumask_test_cpu(cpu, p->cpus_ptr); |
6693 | } | 6398 | } |
6694 | 6399 | ||
6695 | rcu_read_lock(); | 6400 | rcu_read_lock(); |
@@ -7445,14 +7150,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) | |||
7445 | /* | 7150 | /* |
7446 | * We do not migrate tasks that are: | 7151 | * We do not migrate tasks that are: |
7447 | * 1) throttled_lb_pair, or | 7152 | * 1) throttled_lb_pair, or |
7448 | * 2) cannot be migrated to this CPU due to cpus_allowed, or | 7153 | * 2) cannot be migrated to this CPU due to cpus_ptr, or |
7449 | * 3) running (obviously), or | 7154 | * 3) running (obviously), or |
7450 | * 4) are cache-hot on their current CPU. | 7155 | * 4) are cache-hot on their current CPU. |
7451 | */ | 7156 | */ |
7452 | if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) | 7157 | if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) |
7453 | return 0; | 7158 | return 0; |
7454 | 7159 | ||
7455 | if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { | 7160 | if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { |
7456 | int cpu; | 7161 | int cpu; |
7457 | 7162 | ||
7458 | schedstat_inc(p->se.statistics.nr_failed_migrations_affine); | 7163 | schedstat_inc(p->se.statistics.nr_failed_migrations_affine); |
@@ -7472,7 +7177,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) | |||
7472 | 7177 | ||
7473 | /* Prevent to re-select dst_cpu via env's CPUs: */ | 7178 | /* Prevent to re-select dst_cpu via env's CPUs: */ |
7474 | for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { | 7179 | for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { |
7475 | if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { | 7180 | if (cpumask_test_cpu(cpu, p->cpus_ptr)) { |
7476 | env->flags |= LBF_DST_PINNED; | 7181 | env->flags |= LBF_DST_PINNED; |
7477 | env->new_dst_cpu = cpu; | 7182 | env->new_dst_cpu = cpu; |
7478 | break; | 7183 | break; |
@@ -7695,6 +7400,7 @@ static void attach_tasks(struct lb_env *env) | |||
7695 | rq_unlock(env->dst_rq, &rf); | 7400 | rq_unlock(env->dst_rq, &rf); |
7696 | } | 7401 | } |
7697 | 7402 | ||
7403 | #ifdef CONFIG_NO_HZ_COMMON | ||
7698 | static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) | 7404 | static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) |
7699 | { | 7405 | { |
7700 | if (cfs_rq->avg.load_avg) | 7406 | if (cfs_rq->avg.load_avg) |
@@ -7722,6 +7428,19 @@ static inline bool others_have_blocked(struct rq *rq) | |||
7722 | return false; | 7428 | return false; |
7723 | } | 7429 | } |
7724 | 7430 | ||
7431 | static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) | ||
7432 | { | ||
7433 | rq->last_blocked_load_update_tick = jiffies; | ||
7434 | |||
7435 | if (!has_blocked) | ||
7436 | rq->has_blocked_load = 0; | ||
7437 | } | ||
7438 | #else | ||
7439 | static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) { return false; } | ||
7440 | static inline bool others_have_blocked(struct rq *rq) { return false; } | ||
7441 | static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {} | ||
7442 | #endif | ||
7443 | |||
7725 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7444 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7726 | 7445 | ||
7727 | static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) | 7446 | static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) |
@@ -7787,11 +7506,7 @@ static void update_blocked_averages(int cpu) | |||
7787 | if (others_have_blocked(rq)) | 7506 | if (others_have_blocked(rq)) |
7788 | done = false; | 7507 | done = false; |
7789 | 7508 | ||
7790 | #ifdef CONFIG_NO_HZ_COMMON | 7509 | update_blocked_load_status(rq, !done); |
7791 | rq->last_blocked_load_update_tick = jiffies; | ||
7792 | if (done) | ||
7793 | rq->has_blocked_load = 0; | ||
7794 | #endif | ||
7795 | rq_unlock_irqrestore(rq, &rf); | 7510 | rq_unlock_irqrestore(rq, &rf); |
7796 | } | 7511 | } |
7797 | 7512 | ||
@@ -7857,11 +7572,7 @@ static inline void update_blocked_averages(int cpu) | |||
7857 | update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); | 7572 | update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); |
7858 | update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); | 7573 | update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); |
7859 | update_irq_load_avg(rq, 0); | 7574 | update_irq_load_avg(rq, 0); |
7860 | #ifdef CONFIG_NO_HZ_COMMON | 7575 | update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq)); |
7861 | rq->last_blocked_load_update_tick = jiffies; | ||
7862 | if (!cfs_rq_has_blocked(cfs_rq) && !others_have_blocked(rq)) | ||
7863 | rq->has_blocked_load = 0; | ||
7864 | #endif | ||
7865 | rq_unlock_irqrestore(rq, &rf); | 7576 | rq_unlock_irqrestore(rq, &rf); |
7866 | } | 7577 | } |
7867 | 7578 | ||
@@ -7879,7 +7590,6 @@ static unsigned long task_h_load(struct task_struct *p) | |||
7879 | struct sg_lb_stats { | 7590 | struct sg_lb_stats { |
7880 | unsigned long avg_load; /*Avg load across the CPUs of the group */ | 7591 | unsigned long avg_load; /*Avg load across the CPUs of the group */ |
7881 | unsigned long group_load; /* Total load over the CPUs of the group */ | 7592 | unsigned long group_load; /* Total load over the CPUs of the group */ |
7882 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | ||
7883 | unsigned long load_per_task; | 7593 | unsigned long load_per_task; |
7884 | unsigned long group_capacity; | 7594 | unsigned long group_capacity; |
7885 | unsigned long group_util; /* Total utilization of the group */ | 7595 | unsigned long group_util; /* Total utilization of the group */ |
@@ -7933,34 +7643,6 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) | |||
7933 | }; | 7643 | }; |
7934 | } | 7644 | } |
7935 | 7645 | ||
7936 | /** | ||
7937 | * get_sd_load_idx - Obtain the load index for a given sched domain. | ||
7938 | * @sd: The sched_domain whose load_idx is to be obtained. | ||
7939 | * @idle: The idle status of the CPU for whose sd load_idx is obtained. | ||
7940 | * | ||
7941 | * Return: The load index. | ||
7942 | */ | ||
7943 | static inline int get_sd_load_idx(struct sched_domain *sd, | ||
7944 | enum cpu_idle_type idle) | ||
7945 | { | ||
7946 | int load_idx; | ||
7947 | |||
7948 | switch (idle) { | ||
7949 | case CPU_NOT_IDLE: | ||
7950 | load_idx = sd->busy_idx; | ||
7951 | break; | ||
7952 | |||
7953 | case CPU_NEWLY_IDLE: | ||
7954 | load_idx = sd->newidle_idx; | ||
7955 | break; | ||
7956 | default: | ||
7957 | load_idx = sd->idle_idx; | ||
7958 | break; | ||
7959 | } | ||
7960 | |||
7961 | return load_idx; | ||
7962 | } | ||
7963 | |||
7964 | static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu) | 7646 | static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu) |
7965 | { | 7647 | { |
7966 | struct rq *rq = cpu_rq(cpu); | 7648 | struct rq *rq = cpu_rq(cpu); |
@@ -8099,7 +7781,7 @@ static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) | |||
8099 | 7781 | ||
8100 | /* | 7782 | /* |
8101 | * Group imbalance indicates (and tries to solve) the problem where balancing | 7783 | * Group imbalance indicates (and tries to solve) the problem where balancing |
8102 | * groups is inadequate due to ->cpus_allowed constraints. | 7784 | * groups is inadequate due to ->cpus_ptr constraints. |
8103 | * | 7785 | * |
8104 | * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a | 7786 | * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a |
8105 | * cpumask covering 1 CPU of the first group and 3 CPUs of the second group. | 7787 | * cpumask covering 1 CPU of the first group and 3 CPUs of the second group. |
@@ -8249,9 +7931,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
8249 | struct sg_lb_stats *sgs, | 7931 | struct sg_lb_stats *sgs, |
8250 | int *sg_status) | 7932 | int *sg_status) |
8251 | { | 7933 | { |
8252 | int local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group)); | ||
8253 | int load_idx = get_sd_load_idx(env->sd, env->idle); | ||
8254 | unsigned long load; | ||
8255 | int i, nr_running; | 7934 | int i, nr_running; |
8256 | 7935 | ||
8257 | memset(sgs, 0, sizeof(*sgs)); | 7936 | memset(sgs, 0, sizeof(*sgs)); |
@@ -8262,13 +7941,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
8262 | if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) | 7941 | if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) |
8263 | env->flags |= LBF_NOHZ_AGAIN; | 7942 | env->flags |= LBF_NOHZ_AGAIN; |
8264 | 7943 | ||
8265 | /* Bias balancing toward CPUs of our domain: */ | 7944 | sgs->group_load += weighted_cpuload(rq); |
8266 | if (local_group) | ||
8267 | load = target_load(i, load_idx); | ||
8268 | else | ||
8269 | load = source_load(i, load_idx); | ||
8270 | |||
8271 | sgs->group_load += load; | ||
8272 | sgs->group_util += cpu_util(i); | 7945 | sgs->group_util += cpu_util(i); |
8273 | sgs->sum_nr_running += rq->cfs.h_nr_running; | 7946 | sgs->sum_nr_running += rq->cfs.h_nr_running; |
8274 | 7947 | ||
@@ -8283,7 +7956,6 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
8283 | sgs->nr_numa_running += rq->nr_numa_running; | 7956 | sgs->nr_numa_running += rq->nr_numa_running; |
8284 | sgs->nr_preferred_running += rq->nr_preferred_running; | 7957 | sgs->nr_preferred_running += rq->nr_preferred_running; |
8285 | #endif | 7958 | #endif |
8286 | sgs->sum_weighted_load += weighted_cpuload(rq); | ||
8287 | /* | 7959 | /* |
8288 | * No need to call idle_cpu() if nr_running is not 0 | 7960 | * No need to call idle_cpu() if nr_running is not 0 |
8289 | */ | 7961 | */ |
@@ -8302,7 +7974,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
8302 | sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; | 7974 | sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; |
8303 | 7975 | ||
8304 | if (sgs->sum_nr_running) | 7976 | if (sgs->sum_nr_running) |
8305 | sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; | 7977 | sgs->load_per_task = sgs->group_load / sgs->sum_nr_running; |
8306 | 7978 | ||
8307 | sgs->group_weight = group->group_weight; | 7979 | sgs->group_weight = group->group_weight; |
8308 | 7980 | ||
@@ -8768,7 +8440,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) | |||
8768 | /* | 8440 | /* |
8769 | * If the busiest group is imbalanced the below checks don't | 8441 | * If the busiest group is imbalanced the below checks don't |
8770 | * work because they assume all things are equal, which typically | 8442 | * work because they assume all things are equal, which typically |
8771 | * isn't true due to cpus_allowed constraints and the like. | 8443 | * isn't true due to cpus_ptr constraints and the like. |
8772 | */ | 8444 | */ |
8773 | if (busiest->group_type == group_imbalanced) | 8445 | if (busiest->group_type == group_imbalanced) |
8774 | goto force_balance; | 8446 | goto force_balance; |
@@ -9210,7 +8882,7 @@ more_balance: | |||
9210 | * if the curr task on busiest CPU can't be | 8882 | * if the curr task on busiest CPU can't be |
9211 | * moved to this_cpu: | 8883 | * moved to this_cpu: |
9212 | */ | 8884 | */ |
9213 | if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { | 8885 | if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) { |
9214 | raw_spin_unlock_irqrestore(&busiest->lock, | 8886 | raw_spin_unlock_irqrestore(&busiest->lock, |
9215 | flags); | 8887 | flags); |
9216 | env.flags |= LBF_ALL_PINNED; | 8888 | env.flags |= LBF_ALL_PINNED; |
@@ -9879,7 +9551,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags, | |||
9879 | 9551 | ||
9880 | rq_lock_irqsave(rq, &rf); | 9552 | rq_lock_irqsave(rq, &rf); |
9881 | update_rq_clock(rq); | 9553 | update_rq_clock(rq); |
9882 | cpu_load_update_idle(rq); | ||
9883 | rq_unlock_irqrestore(rq, &rf); | 9554 | rq_unlock_irqrestore(rq, &rf); |
9884 | 9555 | ||
9885 | if (flags & NOHZ_BALANCE_KICK) | 9556 | if (flags & NOHZ_BALANCE_KICK) |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 858589b83377..2410db5e9a35 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h | |||
@@ -39,7 +39,6 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true) | |||
39 | 39 | ||
40 | SCHED_FEAT(HRTICK, false) | 40 | SCHED_FEAT(HRTICK, false) |
41 | SCHED_FEAT(DOUBLE_TICK, false) | 41 | SCHED_FEAT(DOUBLE_TICK, false) |
42 | SCHED_FEAT(LB_BIAS, false) | ||
43 | 42 | ||
44 | /* | 43 | /* |
45 | * Decrement CPU capacity based on time not spent running tasks | 44 | * Decrement CPU capacity based on time not spent running tasks |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 1e6b909dca36..63ad7c90822c 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1614,7 +1614,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
1614 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | 1614 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
1615 | { | 1615 | { |
1616 | if (!task_running(rq, p) && | 1616 | if (!task_running(rq, p) && |
1617 | cpumask_test_cpu(cpu, &p->cpus_allowed)) | 1617 | cpumask_test_cpu(cpu, p->cpus_ptr)) |
1618 | return 1; | 1618 | return 1; |
1619 | 1619 | ||
1620 | return 0; | 1620 | return 0; |
@@ -1751,7 +1751,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
1751 | * Also make sure that it wasn't scheduled on its rq. | 1751 | * Also make sure that it wasn't scheduled on its rq. |
1752 | */ | 1752 | */ |
1753 | if (unlikely(task_rq(task) != rq || | 1753 | if (unlikely(task_rq(task) != rq || |
1754 | !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || | 1754 | !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) || |
1755 | task_running(rq, task) || | 1755 | task_running(rq, task) || |
1756 | !rt_task(task) || | 1756 | !rt_task(task) || |
1757 | !task_on_rq_queued(task))) { | 1757 | !task_on_rq_queued(task))) { |
diff --git a/kernel/sched/sched-pelt.h b/kernel/sched/sched-pelt.h index a26473674fb7..c529706bed11 100644 --- a/kernel/sched/sched-pelt.h +++ b/kernel/sched/sched-pelt.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* Generated by Documentation/scheduler/sched-pelt; do not modify. */ | 2 | /* Generated by Documentation/scheduler/sched-pelt; do not modify. */ |
3 | 3 | ||
4 | static const u32 runnable_avg_yN_inv[] = { | 4 | static const u32 runnable_avg_yN_inv[] __maybe_unused = { |
5 | 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, | 5 | 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, |
6 | 0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85, | 6 | 0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85, |
7 | 0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581, | 7 | 0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581, |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b52ed1ada0be..b08dee29ef5e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -96,12 +96,6 @@ extern atomic_long_t calc_load_tasks; | |||
96 | extern void calc_global_load_tick(struct rq *this_rq); | 96 | extern void calc_global_load_tick(struct rq *this_rq); |
97 | extern long calc_load_fold_active(struct rq *this_rq, long adjust); | 97 | extern long calc_load_fold_active(struct rq *this_rq, long adjust); |
98 | 98 | ||
99 | #ifdef CONFIG_SMP | ||
100 | extern void cpu_load_update_active(struct rq *this_rq); | ||
101 | #else | ||
102 | static inline void cpu_load_update_active(struct rq *this_rq) { } | ||
103 | #endif | ||
104 | |||
105 | /* | 99 | /* |
106 | * Helpers for converting nanosecond timing to jiffy resolution | 100 | * Helpers for converting nanosecond timing to jiffy resolution |
107 | */ | 101 | */ |
@@ -344,8 +338,10 @@ struct cfs_bandwidth { | |||
344 | u64 runtime_expires; | 338 | u64 runtime_expires; |
345 | int expires_seq; | 339 | int expires_seq; |
346 | 340 | ||
347 | short idle; | 341 | u8 idle; |
348 | short period_active; | 342 | u8 period_active; |
343 | u8 distribute_running; | ||
344 | u8 slack_started; | ||
349 | struct hrtimer period_timer; | 345 | struct hrtimer period_timer; |
350 | struct hrtimer slack_timer; | 346 | struct hrtimer slack_timer; |
351 | struct list_head throttled_cfs_rq; | 347 | struct list_head throttled_cfs_rq; |
@@ -354,8 +350,6 @@ struct cfs_bandwidth { | |||
354 | int nr_periods; | 350 | int nr_periods; |
355 | int nr_throttled; | 351 | int nr_throttled; |
356 | u64 throttled_time; | 352 | u64 throttled_time; |
357 | |||
358 | bool distribute_running; | ||
359 | #endif | 353 | #endif |
360 | }; | 354 | }; |
361 | 355 | ||
@@ -818,8 +812,6 @@ struct rq { | |||
818 | unsigned int nr_preferred_running; | 812 | unsigned int nr_preferred_running; |
819 | unsigned int numa_migrate_on; | 813 | unsigned int numa_migrate_on; |
820 | #endif | 814 | #endif |
821 | #define CPU_LOAD_IDX_MAX 5 | ||
822 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | ||
823 | #ifdef CONFIG_NO_HZ_COMMON | 815 | #ifdef CONFIG_NO_HZ_COMMON |
824 | #ifdef CONFIG_SMP | 816 | #ifdef CONFIG_SMP |
825 | unsigned long last_load_update_tick; | 817 | unsigned long last_load_update_tick; |
@@ -830,8 +822,6 @@ struct rq { | |||
830 | atomic_t nohz_flags; | 822 | atomic_t nohz_flags; |
831 | #endif /* CONFIG_NO_HZ_COMMON */ | 823 | #endif /* CONFIG_NO_HZ_COMMON */ |
832 | 824 | ||
833 | /* capture load from *all* tasks on this CPU: */ | ||
834 | struct load_weight load; | ||
835 | unsigned long nr_load_updates; | 825 | unsigned long nr_load_updates; |
836 | u64 nr_switches; | 826 | u64 nr_switches; |
837 | 827 | ||
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index f53f89df837d..63184cf0d0d7 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c | |||
@@ -1344,11 +1344,6 @@ sd_init(struct sched_domain_topology_level *tl, | |||
1344 | .imbalance_pct = 125, | 1344 | .imbalance_pct = 125, |
1345 | 1345 | ||
1346 | .cache_nice_tries = 0, | 1346 | .cache_nice_tries = 0, |
1347 | .busy_idx = 0, | ||
1348 | .idle_idx = 0, | ||
1349 | .newidle_idx = 0, | ||
1350 | .wake_idx = 0, | ||
1351 | .forkexec_idx = 0, | ||
1352 | 1347 | ||
1353 | .flags = 1*SD_LOAD_BALANCE | 1348 | .flags = 1*SD_LOAD_BALANCE |
1354 | | 1*SD_BALANCE_NEWIDLE | 1349 | | 1*SD_BALANCE_NEWIDLE |
@@ -1400,13 +1395,10 @@ sd_init(struct sched_domain_topology_level *tl, | |||
1400 | } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { | 1395 | } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { |
1401 | sd->imbalance_pct = 117; | 1396 | sd->imbalance_pct = 117; |
1402 | sd->cache_nice_tries = 1; | 1397 | sd->cache_nice_tries = 1; |
1403 | sd->busy_idx = 2; | ||
1404 | 1398 | ||
1405 | #ifdef CONFIG_NUMA | 1399 | #ifdef CONFIG_NUMA |
1406 | } else if (sd->flags & SD_NUMA) { | 1400 | } else if (sd->flags & SD_NUMA) { |
1407 | sd->cache_nice_tries = 2; | 1401 | sd->cache_nice_tries = 2; |
1408 | sd->busy_idx = 3; | ||
1409 | sd->idle_idx = 2; | ||
1410 | 1402 | ||
1411 | sd->flags &= ~SD_PREFER_SIBLING; | 1403 | sd->flags &= ~SD_PREFER_SIBLING; |
1412 | sd->flags |= SD_SERIALIZE; | 1404 | sd->flags |= SD_SERIALIZE; |
@@ -1419,8 +1411,6 @@ sd_init(struct sched_domain_topology_level *tl, | |||
1419 | #endif | 1411 | #endif |
1420 | } else { | 1412 | } else { |
1421 | sd->cache_nice_tries = 1; | 1413 | sd->cache_nice_tries = 1; |
1422 | sd->busy_idx = 2; | ||
1423 | sd->idle_idx = 1; | ||
1424 | } | 1414 | } |
1425 | 1415 | ||
1426 | /* | 1416 | /* |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f4ee1a3428ae..be9707f68024 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -782,7 +782,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) | |||
782 | */ | 782 | */ |
783 | if (!ts->tick_stopped) { | 783 | if (!ts->tick_stopped) { |
784 | calc_load_nohz_start(); | 784 | calc_load_nohz_start(); |
785 | cpu_load_update_nohz_start(); | ||
786 | quiet_vmstat(); | 785 | quiet_vmstat(); |
787 | 786 | ||
788 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); | 787 | ts->last_tick = hrtimer_get_expires(&ts->sched_timer); |
@@ -829,7 +828,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | |||
829 | { | 828 | { |
830 | /* Update jiffies first */ | 829 | /* Update jiffies first */ |
831 | tick_do_update_jiffies64(now); | 830 | tick_do_update_jiffies64(now); |
832 | cpu_load_update_nohz_stop(); | ||
833 | 831 | ||
834 | /* | 832 | /* |
835 | * Clear the timer idle flag, so we avoid IPIs on remote queueing and | 833 | * Clear the timer idle flag, so we avoid IPIs on remote queueing and |
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 1e6db9cbe4dc..fa95139445b2 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c | |||
@@ -277,7 +277,7 @@ static void move_to_next_cpu(void) | |||
277 | * of this thread, than stop migrating for the duration | 277 | * of this thread, than stop migrating for the duration |
278 | * of the current test. | 278 | * of the current test. |
279 | */ | 279 | */ |
280 | if (!cpumask_equal(current_mask, ¤t->cpus_allowed)) | 280 | if (!cpumask_equal(current_mask, current->cpus_ptr)) |
281 | goto disable; | 281 | goto disable; |
282 | 282 | ||
283 | get_online_cpus(); | 283 | get_online_cpus(); |
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 157d9e31f6c2..60ba93fc42ce 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c | |||
@@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) | |||
23 | * Kernel threads bound to a single CPU can safely use | 23 | * Kernel threads bound to a single CPU can safely use |
24 | * smp_processor_id(): | 24 | * smp_processor_id(): |
25 | */ | 25 | */ |
26 | if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) | 26 | if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu))) |
27 | goto out; | 27 | goto out; |
28 | 28 | ||
29 | /* | 29 | /* |
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 1da597aa6141..1a72b7d95cdc 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c | |||
@@ -34,7 +34,7 @@ static void simple_thread_func(int cnt) | |||
34 | 34 | ||
35 | /* Silly tracepoints */ | 35 | /* Silly tracepoints */ |
36 | trace_foo_bar("hello", cnt, array, random_strings[len], | 36 | trace_foo_bar("hello", cnt, array, random_strings[len], |
37 | ¤t->cpus_allowed); | 37 | current->cpus_ptr); |
38 | 38 | ||
39 | trace_foo_with_template_simple("HELLO", cnt); | 39 | trace_foo_with_template_simple("HELLO", cnt); |
40 | 40 | ||