summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-pelt.c3
-rw-r--r--arch/ia64/kernel/mca.c2
-rw-r--r--arch/mips/include/asm/switch_to.h4
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c2
-rw-r--r--arch/mips/kernel/traps.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c6
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c7
-rw-r--r--fs/proc/array.c4
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/sched/nohz.h8
-rw-r--r--include/linux/sched/topology.h5
-rw-r--r--init/init_task.c3
-rw-r--r--kernel/cgroup/cpuset.c2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/sched/core.c83
-rw-r--r--kernel/sched/cpudeadline.c4
-rw-r--r--kernel/sched/cpupri.c4
-rw-r--r--kernel/sched/deadline.c6
-rw-r--r--kernel/sched/debug.c43
-rw-r--r--kernel/sched/fair.c423
-rw-r--r--kernel/sched/features.h1
-rw-r--r--kernel/sched/rt.c4
-rw-r--r--kernel/sched/sched-pelt.h2
-rw-r--r--kernel/sched/sched.h18
-rw-r--r--kernel/sched/topology.c10
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--lib/smp_processor_id.c2
-rw-r--r--samples/trace_events/trace-events-sample.c2
32 files changed, 154 insertions, 518 deletions
diff --git a/Documentation/scheduler/sched-pelt.c b/Documentation/scheduler/sched-pelt.c
index e4219139386a..7238b355919c 100644
--- a/Documentation/scheduler/sched-pelt.c
+++ b/Documentation/scheduler/sched-pelt.c
@@ -20,7 +20,8 @@ void calc_runnable_avg_yN_inv(void)
20 int i; 20 int i;
21 unsigned int x; 21 unsigned int x;
22 22
23 printf("static const u32 runnable_avg_yN_inv[] = {"); 23 /* To silence -Wunused-but-set-variable warnings. */
24 printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {");
24 for (i = 0; i < HALFLIFE; i++) { 25 for (i = 0; i < HALFLIFE; i++) {
25 x = ((1UL<<32)-1)*pow(y, i); 26 x = ((1UL<<32)-1)*pow(y, i);
26 27
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6a52d761854b..79190d877fa7 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
1831 ti->cpu = cpu; 1831 ti->cpu = cpu;
1832 p->stack = ti; 1832 p->stack = ti;
1833 p->state = TASK_UNINTERRUPTIBLE; 1833 p->state = TASK_UNINTERRUPTIBLE;
1834 cpumask_set_cpu(cpu, &p->cpus_allowed); 1834 cpumask_set_cpu(cpu, &p->cpus_mask);
1835 INIT_LIST_HEAD(&p->tasks); 1835 INIT_LIST_HEAD(&p->tasks);
1836 p->parent = p->real_parent = p->group_leader = p; 1836 p->parent = p->real_parent = p->group_leader = p;
1837 INIT_LIST_HEAD(&p->children); 1837 INIT_LIST_HEAD(&p->children);
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 0f813bb753c6..09cbe9042828 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
42 * inline to try to keep the overhead down. If we have been forced to run on 42 * inline to try to keep the overhead down. If we have been forced to run on
43 * a "CPU" with an FPU because of a previous high level of FP computation, 43 * a "CPU" with an FPU because of a previous high level of FP computation,
44 * but did not actually use the FPU during the most recent time-slice (CU1 44 * but did not actually use the FPU during the most recent time-slice (CU1
45 * isn't set), we undo the restriction on cpus_allowed. 45 * isn't set), we undo the restriction on cpus_mask.
46 * 46 *
47 * We're not calling set_cpus_allowed() here, because we have no need to 47 * We're not calling set_cpus_allowed() here, because we have no need to
48 * force prompt migration - we're already switching the current CPU to a 48 * force prompt migration - we're already switching the current CPU to a
@@ -57,7 +57,7 @@ do { \
57 test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \ 57 test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
58 (!(KSTK_STATUS(prev) & ST0_CU1))) { \ 58 (!(KSTK_STATUS(prev) & ST0_CU1))) { \
59 clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \ 59 clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
60 prev->cpus_allowed = prev->thread.user_cpus_allowed; \ 60 prev->cpus_mask = prev->thread.user_cpus_allowed; \
61 } \ 61 } \
62 next->thread.emulated_fp = 0; \ 62 next->thread.emulated_fp = 0; \
63} while(0) 63} while(0)
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index a7c0f97e4b0d..1a08428eedcf 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
177 if (retval) 177 if (retval)
178 goto out_unlock; 178 goto out_unlock;
179 179
180 cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); 180 cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
181 cpumask_and(&mask, &allowed, cpu_active_mask); 181 cpumask_and(&mask, &allowed, cpu_active_mask);
182 182
183out_unlock: 183out_unlock:
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c52766a5b85f..ac7159263da0 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void)
891 * restricted the allowed set to exclude any CPUs with FPUs, 891 * restricted the allowed set to exclude any CPUs with FPUs,
892 * we'll skip the procedure. 892 * we'll skip the procedure.
893 */ 893 */
894 if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) { 894 if (cpumask_intersects(&current->cpus_mask, &mt_fpu_cpumask)) {
895 cpumask_t tmask; 895 cpumask_t tmask;
896 896
897 current->thread.user_cpus_allowed 897 current->thread.user_cpus_allowed
898 = current->cpus_allowed; 898 = current->cpus_mask;
899 cpumask_and(&tmask, &current->cpus_allowed, 899 cpumask_and(&tmask, &current->cpus_mask,
900 &mt_fpu_cpumask); 900 &mt_fpu_cpumask);
901 set_cpus_allowed_ptr(current, &tmask); 901 set_cpus_allowed_ptr(current, &tmask);
902 set_thread_flag(TIF_FPUBOUND); 902 set_thread_flag(TIF_FPUBOUND);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index e56b553de27b..f18d5067cd0f 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
128 * runqueue. The context will be rescheduled on the proper node 128 * runqueue. The context will be rescheduled on the proper node
129 * if it is timesliced or preempted. 129 * if it is timesliced or preempted.
130 */ 130 */
131 cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed); 131 cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
132 132
133 /* Save the current cpu id for spu interrupt routing. */ 133 /* Save the current cpu id for spu interrupt routing. */
134 ctx->last_ran = raw_smp_processor_id(); 134 ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 604c0e3bcc83..f68baccc69f0 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1503,7 +1503,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
1503 * may be scheduled elsewhere and invalidate entries in the 1503 * may be scheduled elsewhere and invalidate entries in the
1504 * pseudo-locked region. 1504 * pseudo-locked region.
1505 */ 1505 */
1506 if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) { 1506 if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
1507 mutex_unlock(&rdtgroup_mutex); 1507 mutex_unlock(&rdtgroup_mutex);
1508 return -EINVAL; 1508 return -EINVAL;
1509 } 1509 }
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 4fe662c3bbc1..c142b23bb401 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node)
1038 struct hfi1_affinity_node *entry; 1038 struct hfi1_affinity_node *entry;
1039 cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; 1039 cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
1040 const struct cpumask *node_mask, 1040 const struct cpumask *node_mask,
1041 *proc_mask = &current->cpus_allowed; 1041 *proc_mask = current->cpus_ptr;
1042 struct hfi1_affinity_node_list *affinity = &node_affinity; 1042 struct hfi1_affinity_node_list *affinity = &node_affinity;
1043 struct cpu_mask_set *set = &affinity->proc; 1043 struct cpu_mask_set *set = &affinity->proc;
1044 1044
@@ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node)
1046 * check whether process/context affinity has already 1046 * check whether process/context affinity has already
1047 * been set 1047 * been set
1048 */ 1048 */
1049 if (cpumask_weight(proc_mask) == 1) { 1049 if (current->nr_cpus_allowed == 1) {
1050 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", 1050 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
1051 current->pid, current->comm, 1051 current->pid, current->comm,
1052 cpumask_pr_args(proc_mask)); 1052 cpumask_pr_args(proc_mask));
@@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node)
1057 cpu = cpumask_first(proc_mask); 1057 cpu = cpumask_first(proc_mask);
1058 cpumask_set_cpu(cpu, &set->used); 1058 cpumask_set_cpu(cpu, &set->used);
1059 goto done; 1059 goto done;
1060 } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { 1060 } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
1061 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", 1061 hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
1062 current->pid, current->comm, 1062 current->pid, current->comm,
1063 cpumask_pr_args(proc_mask)); 1063 cpumask_pr_args(proc_mask));
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 28b66bd70b74..2395fd4233a7 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -869,14 +869,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
869{ 869{
870 struct sdma_rht_node *rht_node; 870 struct sdma_rht_node *rht_node;
871 struct sdma_engine *sde = NULL; 871 struct sdma_engine *sde = NULL;
872 const struct cpumask *current_mask = &current->cpus_allowed;
873 unsigned long cpu_id; 872 unsigned long cpu_id;
874 873
875 /* 874 /*
876 * To ensure that always the same sdma engine(s) will be 875 * To ensure that always the same sdma engine(s) will be
877 * selected make sure the process is pinned to this CPU only. 876 * selected make sure the process is pinned to this CPU only.
878 */ 877 */
879 if (cpumask_weight(current_mask) != 1) 878 if (current->nr_cpus_allowed != 1)
880 goto out; 879 goto out;
881 880
882 cpu_id = smp_processor_id(); 881 cpu_id = smp_processor_id();
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 78fa634de98a..27b6e664e59d 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
1142static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) 1142static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
1143{ 1143{
1144 struct qib_filedata *fd = fp->private_data; 1144 struct qib_filedata *fd = fp->private_data;
1145 const unsigned int weight = cpumask_weight(&current->cpus_allowed); 1145 const unsigned int weight = current->nr_cpus_allowed;
1146 const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); 1146 const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
1147 int local_cpu; 1147 int local_cpu;
1148 1148
@@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
1623 ret = find_free_ctxt(i_minor - 1, fp, uinfo); 1623 ret = find_free_ctxt(i_minor - 1, fp, uinfo);
1624 else { 1624 else {
1625 int unit; 1625 int unit;
1626 const unsigned int cpu = cpumask_first(&current->cpus_allowed); 1626 const unsigned int cpu = cpumask_first(current->cpus_ptr);
1627 const unsigned int weight = 1627 const unsigned int weight = current->nr_cpus_allowed;
1628 cpumask_weight(&current->cpus_allowed);
1629 1628
1630 if (weight == 1 && !test_bit(cpu, qib_cpulist)) 1629 if (weight == 1 && !test_bit(cpu, qib_cpulist))
1631 if (!find_hca(cpu, &unit) && unit >= 0) 1630 if (!find_hca(cpu, &unit) && unit >= 0)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 2edbb657f859..84908556ea58 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
381static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 381static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
382{ 382{
383 seq_printf(m, "Cpus_allowed:\t%*pb\n", 383 seq_printf(m, "Cpus_allowed:\t%*pb\n",
384 cpumask_pr_args(&task->cpus_allowed)); 384 cpumask_pr_args(task->cpus_ptr));
385 seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", 385 seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
386 cpumask_pr_args(&task->cpus_allowed)); 386 cpumask_pr_args(task->cpus_ptr));
387} 387}
388 388
389static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) 389static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11837410690f..1b2590a8d038 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -651,7 +651,8 @@ struct task_struct {
651 651
652 unsigned int policy; 652 unsigned int policy;
653 int nr_cpus_allowed; 653 int nr_cpus_allowed;
654 cpumask_t cpus_allowed; 654 const cpumask_t *cpus_ptr;
655 cpumask_t cpus_mask;
655 656
656#ifdef CONFIG_PREEMPT_RCU 657#ifdef CONFIG_PREEMPT_RCU
657 int rcu_read_lock_nesting; 658 int rcu_read_lock_nesting;
@@ -1399,7 +1400,7 @@ extern struct pid *cad_pid;
1399#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ 1400#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1400#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ 1401#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
1401#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ 1402#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
1402#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ 1403#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
1403#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ 1404#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1404#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ 1405#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
1405#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ 1406#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index b36f4cf38111..1abe91ff6e4a 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -7,14 +7,6 @@
7 */ 7 */
8 8
9#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 9#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
10extern void cpu_load_update_nohz_start(void);
11extern void cpu_load_update_nohz_stop(void);
12#else
13static inline void cpu_load_update_nohz_start(void) { }
14static inline void cpu_load_update_nohz_stop(void) { }
15#endif
16
17#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
18extern void nohz_balance_enter_idle(int cpu); 10extern void nohz_balance_enter_idle(int cpu);
19extern int get_nohz_timer_target(void); 11extern int get_nohz_timer_target(void);
20#else 12#else
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index cfc0a89a7159..53afbe07354a 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -84,11 +84,6 @@ struct sched_domain {
84 unsigned int busy_factor; /* less balancing by factor if busy */ 84 unsigned int busy_factor; /* less balancing by factor if busy */
85 unsigned int imbalance_pct; /* No balance until over watermark */ 85 unsigned int imbalance_pct; /* No balance until over watermark */
86 unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ 86 unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
87 unsigned int busy_idx;
88 unsigned int idle_idx;
89 unsigned int newidle_idx;
90 unsigned int wake_idx;
91 unsigned int forkexec_idx;
92 87
93 int nohz_idle; /* NOHZ IDLE status */ 88 int nohz_idle; /* NOHZ IDLE status */
94 int flags; /* See SD_* */ 89 int flags; /* See SD_* */
diff --git a/init/init_task.c b/init/init_task.c
index c70ef656d0f4..3c27c0efa316 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -72,7 +72,8 @@ struct task_struct init_task
72 .static_prio = MAX_PRIO - 20, 72 .static_prio = MAX_PRIO - 20,
73 .normal_prio = MAX_PRIO - 20, 73 .normal_prio = MAX_PRIO - 20,
74 .policy = SCHED_NORMAL, 74 .policy = SCHED_NORMAL,
75 .cpus_allowed = CPU_MASK_ALL, 75 .cpus_ptr = &init_task.cpus_mask,
76 .cpus_mask = CPU_MASK_ALL,
76 .nr_cpus_allowed= NR_CPUS, 77 .nr_cpus_allowed= NR_CPUS,
77 .mm = NULL, 78 .mm = NULL,
78 .active_mm = &init_mm, 79 .active_mm = &init_mm,
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 515525ff1cfd..a1590e244f5f 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task)
2829 if (task_css_is_root(task, cpuset_cgrp_id)) 2829 if (task_css_is_root(task, cpuset_cgrp_id))
2830 return; 2830 return;
2831 2831
2832 set_cpus_allowed_ptr(task, &current->cpus_allowed); 2832 set_cpus_allowed_ptr(task, current->cpus_ptr);
2833 task->mems_allowed = current->mems_allowed; 2833 task->mems_allowed = current->mems_allowed;
2834} 2834}
2835 2835
diff --git a/kernel/fork.c b/kernel/fork.c
index 75675b9bf6df..6be686283e55 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -894,6 +894,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
894#ifdef CONFIG_STACKPROTECTOR 894#ifdef CONFIG_STACKPROTECTOR
895 tsk->stack_canary = get_random_canary(); 895 tsk->stack_canary = get_random_canary();
896#endif 896#endif
897 if (orig->cpus_ptr == &orig->cpus_mask)
898 tsk->cpus_ptr = &tsk->cpus_mask;
897 899
898 /* 900 /*
899 * One for us, one for whoever does the "release_task()" (usually 901 * One for us, one for whoever does the "release_task()" (usually
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 874c427742a9..83bd6bb32a34 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -930,7 +930,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
930 */ 930 */
931static inline bool is_cpu_allowed(struct task_struct *p, int cpu) 931static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
932{ 932{
933 if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) 933 if (!cpumask_test_cpu(cpu, p->cpus_ptr))
934 return false; 934 return false;
935 935
936 if (is_per_cpu_kthread(p)) 936 if (is_per_cpu_kthread(p))
@@ -1025,7 +1025,7 @@ static int migration_cpu_stop(void *data)
1025 local_irq_disable(); 1025 local_irq_disable();
1026 /* 1026 /*
1027 * We need to explicitly wake pending tasks before running 1027 * We need to explicitly wake pending tasks before running
1028 * __migrate_task() such that we will not miss enforcing cpus_allowed 1028 * __migrate_task() such that we will not miss enforcing cpus_ptr
1029 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. 1029 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
1030 */ 1030 */
1031 sched_ttwu_pending(); 1031 sched_ttwu_pending();
@@ -1056,7 +1056,7 @@ static int migration_cpu_stop(void *data)
1056 */ 1056 */
1057void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) 1057void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
1058{ 1058{
1059 cpumask_copy(&p->cpus_allowed, new_mask); 1059 cpumask_copy(&p->cpus_mask, new_mask);
1060 p->nr_cpus_allowed = cpumask_weight(new_mask); 1060 p->nr_cpus_allowed = cpumask_weight(new_mask);
1061} 1061}
1062 1062
@@ -1126,7 +1126,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
1126 goto out; 1126 goto out;
1127 } 1127 }
1128 1128
1129 if (cpumask_equal(&p->cpus_allowed, new_mask)) 1129 if (cpumask_equal(p->cpus_ptr, new_mask))
1130 goto out; 1130 goto out;
1131 1131
1132 if (!cpumask_intersects(new_mask, cpu_valid_mask)) { 1132 if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
@@ -1286,10 +1286,10 @@ static int migrate_swap_stop(void *data)
1286 if (task_cpu(arg->src_task) != arg->src_cpu) 1286 if (task_cpu(arg->src_task) != arg->src_cpu)
1287 goto unlock; 1287 goto unlock;
1288 1288
1289 if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) 1289 if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
1290 goto unlock; 1290 goto unlock;
1291 1291
1292 if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) 1292 if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
1293 goto unlock; 1293 goto unlock;
1294 1294
1295 __migrate_swap_task(arg->src_task, arg->dst_cpu); 1295 __migrate_swap_task(arg->src_task, arg->dst_cpu);
@@ -1331,10 +1331,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
1331 if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) 1331 if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
1332 goto out; 1332 goto out;
1333 1333
1334 if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) 1334 if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
1335 goto out; 1335 goto out;
1336 1336
1337 if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) 1337 if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
1338 goto out; 1338 goto out;
1339 1339
1340 trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); 1340 trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
@@ -1479,7 +1479,7 @@ void kick_process(struct task_struct *p)
1479EXPORT_SYMBOL_GPL(kick_process); 1479EXPORT_SYMBOL_GPL(kick_process);
1480 1480
1481/* 1481/*
1482 * ->cpus_allowed is protected by both rq->lock and p->pi_lock 1482 * ->cpus_ptr is protected by both rq->lock and p->pi_lock
1483 * 1483 *
1484 * A few notes on cpu_active vs cpu_online: 1484 * A few notes on cpu_active vs cpu_online:
1485 * 1485 *
@@ -1519,14 +1519,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
1519 for_each_cpu(dest_cpu, nodemask) { 1519 for_each_cpu(dest_cpu, nodemask) {
1520 if (!cpu_active(dest_cpu)) 1520 if (!cpu_active(dest_cpu))
1521 continue; 1521 continue;
1522 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 1522 if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
1523 return dest_cpu; 1523 return dest_cpu;
1524 } 1524 }
1525 } 1525 }
1526 1526
1527 for (;;) { 1527 for (;;) {
1528 /* Any allowed, online CPU? */ 1528 /* Any allowed, online CPU? */
1529 for_each_cpu(dest_cpu, &p->cpus_allowed) { 1529 for_each_cpu(dest_cpu, p->cpus_ptr) {
1530 if (!is_cpu_allowed(p, dest_cpu)) 1530 if (!is_cpu_allowed(p, dest_cpu))
1531 continue; 1531 continue;
1532 1532
@@ -1570,7 +1570,7 @@ out:
1570} 1570}
1571 1571
1572/* 1572/*
1573 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. 1573 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
1574 */ 1574 */
1575static inline 1575static inline
1576int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) 1576int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
@@ -1580,11 +1580,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
1580 if (p->nr_cpus_allowed > 1) 1580 if (p->nr_cpus_allowed > 1)
1581 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); 1581 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
1582 else 1582 else
1583 cpu = cpumask_any(&p->cpus_allowed); 1583 cpu = cpumask_any(p->cpus_ptr);
1584 1584
1585 /* 1585 /*
1586 * In order not to call set_task_cpu() on a blocking task we need 1586 * In order not to call set_task_cpu() on a blocking task we need
1587 * to rely on ttwu() to place the task on a valid ->cpus_allowed 1587 * to rely on ttwu() to place the task on a valid ->cpus_ptr
1588 * CPU. 1588 * CPU.
1589 * 1589 *
1590 * Since this is common to all placement strategies, this lives here. 1590 * Since this is common to all placement strategies, this lives here.
@@ -1991,6 +1991,29 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1991 unsigned long flags; 1991 unsigned long flags;
1992 int cpu, success = 0; 1992 int cpu, success = 0;
1993 1993
1994 if (p == current) {
1995 /*
1996 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
1997 * == smp_processor_id()'. Together this means we can special
1998 * case the whole 'p->on_rq && ttwu_remote()' case below
1999 * without taking any locks.
2000 *
2001 * In particular:
2002 * - we rely on Program-Order guarantees for all the ordering,
2003 * - we're serialized against set_special_state() by virtue of
2004 * it disabling IRQs (this allows not taking ->pi_lock).
2005 */
2006 if (!(p->state & state))
2007 return false;
2008
2009 success = 1;
2010 cpu = task_cpu(p);
2011 trace_sched_waking(p);
2012 p->state = TASK_RUNNING;
2013 trace_sched_wakeup(p);
2014 goto out;
2015 }
2016
1994 /* 2017 /*
1995 * If we are going to wake up a thread waiting for CONDITION we 2018 * If we are going to wake up a thread waiting for CONDITION we
1996 * need to ensure that CONDITION=1 done by the caller can not be 2019 * need to ensure that CONDITION=1 done by the caller can not be
@@ -2000,7 +2023,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2000 raw_spin_lock_irqsave(&p->pi_lock, flags); 2023 raw_spin_lock_irqsave(&p->pi_lock, flags);
2001 smp_mb__after_spinlock(); 2024 smp_mb__after_spinlock();
2002 if (!(p->state & state)) 2025 if (!(p->state & state))
2003 goto out; 2026 goto unlock;
2004 2027
2005 trace_sched_waking(p); 2028 trace_sched_waking(p);
2006 2029
@@ -2030,7 +2053,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2030 */ 2053 */
2031 smp_rmb(); 2054 smp_rmb();
2032 if (p->on_rq && ttwu_remote(p, wake_flags)) 2055 if (p->on_rq && ttwu_remote(p, wake_flags))
2033 goto stat; 2056 goto unlock;
2034 2057
2035#ifdef CONFIG_SMP 2058#ifdef CONFIG_SMP
2036 /* 2059 /*
@@ -2090,10 +2113,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2090#endif /* CONFIG_SMP */ 2113#endif /* CONFIG_SMP */
2091 2114
2092 ttwu_queue(p, cpu, wake_flags); 2115 ttwu_queue(p, cpu, wake_flags);
2093stat: 2116unlock:
2094 ttwu_stat(p, cpu, wake_flags);
2095out:
2096 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 2117 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2118out:
2119 if (success)
2120 ttwu_stat(p, cpu, wake_flags);
2097 2121
2098 return success; 2122 return success;
2099} 2123}
@@ -2395,7 +2419,7 @@ void wake_up_new_task(struct task_struct *p)
2395#ifdef CONFIG_SMP 2419#ifdef CONFIG_SMP
2396 /* 2420 /*
2397 * Fork balancing, do it here and not earlier because: 2421 * Fork balancing, do it here and not earlier because:
2398 * - cpus_allowed can change in the fork path 2422 * - cpus_ptr can change in the fork path
2399 * - any previously selected CPU might disappear through hotplug 2423 * - any previously selected CPU might disappear through hotplug
2400 * 2424 *
2401 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, 2425 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
@@ -3033,7 +3057,6 @@ void scheduler_tick(void)
3033 3057
3034 update_rq_clock(rq); 3058 update_rq_clock(rq);
3035 curr->sched_class->task_tick(rq, curr, 0); 3059 curr->sched_class->task_tick(rq, curr, 0);
3036 cpu_load_update_active(rq);
3037 calc_global_load_tick(rq); 3060 calc_global_load_tick(rq);
3038 psi_task_tick(rq); 3061 psi_task_tick(rq);
3039 3062
@@ -4267,7 +4290,7 @@ change:
4267 * the entire root_domain to become SCHED_DEADLINE. We 4290 * the entire root_domain to become SCHED_DEADLINE. We
4268 * will also fail if there's no bandwidth available. 4291 * will also fail if there's no bandwidth available.
4269 */ 4292 */
4270 if (!cpumask_subset(span, &p->cpus_allowed) || 4293 if (!cpumask_subset(span, p->cpus_ptr) ||
4271 rq->rd->dl_bw.bw == 0) { 4294 rq->rd->dl_bw.bw == 0) {
4272 task_rq_unlock(rq, p, &rf); 4295 task_rq_unlock(rq, p, &rf);
4273 return -EPERM; 4296 return -EPERM;
@@ -4866,7 +4889,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
4866 goto out_unlock; 4889 goto out_unlock;
4867 4890
4868 raw_spin_lock_irqsave(&p->pi_lock, flags); 4891 raw_spin_lock_irqsave(&p->pi_lock, flags);
4869 cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); 4892 cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
4870 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 4893 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4871 4894
4872out_unlock: 4895out_unlock:
@@ -5123,7 +5146,7 @@ long __sched io_schedule_timeout(long timeout)
5123} 5146}
5124EXPORT_SYMBOL(io_schedule_timeout); 5147EXPORT_SYMBOL(io_schedule_timeout);
5125 5148
5126void io_schedule(void) 5149void __sched io_schedule(void)
5127{ 5150{
5128 int token; 5151 int token;
5129 5152
@@ -5443,7 +5466,7 @@ int task_can_attach(struct task_struct *p,
5443 * allowed nodes is unnecessary. Thus, cpusets are not 5466 * allowed nodes is unnecessary. Thus, cpusets are not
5444 * applicable for such threads. This prevents checking for 5467 * applicable for such threads. This prevents checking for
5445 * success of set_cpus_allowed_ptr() on all attached tasks 5468 * success of set_cpus_allowed_ptr() on all attached tasks
5446 * before cpus_allowed may be changed. 5469 * before cpus_mask may be changed.
5447 */ 5470 */
5448 if (p->flags & PF_NO_SETAFFINITY) { 5471 if (p->flags & PF_NO_SETAFFINITY) {
5449 ret = -EINVAL; 5472 ret = -EINVAL;
@@ -5470,7 +5493,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
5470 if (curr_cpu == target_cpu) 5493 if (curr_cpu == target_cpu)
5471 return 0; 5494 return 0;
5472 5495
5473 if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) 5496 if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
5474 return -EINVAL; 5497 return -EINVAL;
5475 5498
5476 /* TODO: This is not properly updating schedstats */ 5499 /* TODO: This is not properly updating schedstats */
@@ -5608,7 +5631,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
5608 put_prev_task(rq, next); 5631 put_prev_task(rq, next);
5609 5632
5610 /* 5633 /*
5611 * Rules for changing task_struct::cpus_allowed are holding 5634 * Rules for changing task_struct::cpus_mask are holding
5612 * both pi_lock and rq->lock, such that holding either 5635 * both pi_lock and rq->lock, such that holding either
5613 * stabilizes the mask. 5636 * stabilizes the mask.
5614 * 5637 *
@@ -5902,8 +5925,8 @@ DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
5902 5925
5903void __init sched_init(void) 5926void __init sched_init(void)
5904{ 5927{
5905 int i, j;
5906 unsigned long alloc_size = 0, ptr; 5928 unsigned long alloc_size = 0, ptr;
5929 int i;
5907 5930
5908 wait_bit_init(); 5931 wait_bit_init();
5909 5932
@@ -6005,10 +6028,6 @@ void __init sched_init(void)
6005#ifdef CONFIG_RT_GROUP_SCHED 6028#ifdef CONFIG_RT_GROUP_SCHED
6006 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); 6029 init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
6007#endif 6030#endif
6008
6009 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
6010 rq->cpu_load[j] = 0;
6011
6012#ifdef CONFIG_SMP 6031#ifdef CONFIG_SMP
6013 rq->sd = NULL; 6032 rq->sd = NULL;
6014 rq->rd = NULL; 6033 rq->rd = NULL;
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index ec4e4a9aab5f..5cc4012572ec 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -120,14 +120,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
120 const struct sched_dl_entity *dl_se = &p->dl; 120 const struct sched_dl_entity *dl_se = &p->dl;
121 121
122 if (later_mask && 122 if (later_mask &&
123 cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { 123 cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
124 return 1; 124 return 1;
125 } else { 125 } else {
126 int best_cpu = cpudl_maximum(cp); 126 int best_cpu = cpudl_maximum(cp);
127 127
128 WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); 128 WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
129 129
130 if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && 130 if (cpumask_test_cpu(best_cpu, p->cpus_ptr) &&
131 dl_time_before(dl_se->deadline, cp->elements[0].dl)) { 131 dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
132 if (later_mask) 132 if (later_mask)
133 cpumask_set_cpu(best_cpu, later_mask); 133 cpumask_set_cpu(best_cpu, later_mask);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 9c6480e6d62d..b7abca987d94 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -94,11 +94,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
94 if (skip) 94 if (skip)
95 continue; 95 continue;
96 96
97 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 97 if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
98 continue; 98 continue;
99 99
100 if (lowest_mask) { 100 if (lowest_mask) {
101 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 101 cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
102 102
103 /* 103 /*
104 * We have to ensure that we have at least one bit 104 * We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 43901fa3f269..c1ef30861068 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -538,7 +538,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
538 * If we cannot preempt any rq, fall back to pick any 538 * If we cannot preempt any rq, fall back to pick any
539 * online CPU: 539 * online CPU:
540 */ 540 */
541 cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); 541 cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
542 if (cpu >= nr_cpu_ids) { 542 if (cpu >= nr_cpu_ids) {
543 /* 543 /*
544 * Failed to find any suitable CPU. 544 * Failed to find any suitable CPU.
@@ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *rq)
1824static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) 1824static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
1825{ 1825{
1826 if (!task_running(rq, p) && 1826 if (!task_running(rq, p) &&
1827 cpumask_test_cpu(cpu, &p->cpus_allowed)) 1827 cpumask_test_cpu(cpu, p->cpus_ptr))
1828 return 1; 1828 return 1;
1829 return 0; 1829 return 0;
1830} 1830}
@@ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
1974 /* Retry if something changed. */ 1974 /* Retry if something changed. */
1975 if (double_lock_balance(rq, later_rq)) { 1975 if (double_lock_balance(rq, later_rq)) {
1976 if (unlikely(task_rq(task) != rq || 1976 if (unlikely(task_rq(task) != rq ||
1977 !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || 1977 !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
1978 task_running(rq, task) || 1978 task_running(rq, task) ||
1979 !dl_task(task) || 1979 !dl_task(task) ||
1980 !task_on_rq_queued(task))) { 1980 !task_on_rq_queued(task))) {
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 14c6a8716ba1..f7e4579e746c 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -233,49 +233,35 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
233 *tablep = NULL; 233 *tablep = NULL;
234} 234}
235 235
236static int min_load_idx = 0;
237static int max_load_idx = CPU_LOAD_IDX_MAX-1;
238
239static void 236static void
240set_table_entry(struct ctl_table *entry, 237set_table_entry(struct ctl_table *entry,
241 const char *procname, void *data, int maxlen, 238 const char *procname, void *data, int maxlen,
242 umode_t mode, proc_handler *proc_handler, 239 umode_t mode, proc_handler *proc_handler)
243 bool load_idx)
244{ 240{
245 entry->procname = procname; 241 entry->procname = procname;
246 entry->data = data; 242 entry->data = data;
247 entry->maxlen = maxlen; 243 entry->maxlen = maxlen;
248 entry->mode = mode; 244 entry->mode = mode;
249 entry->proc_handler = proc_handler; 245 entry->proc_handler = proc_handler;
250
251 if (load_idx) {
252 entry->extra1 = &min_load_idx;
253 entry->extra2 = &max_load_idx;
254 }
255} 246}
256 247
257static struct ctl_table * 248static struct ctl_table *
258sd_alloc_ctl_domain_table(struct sched_domain *sd) 249sd_alloc_ctl_domain_table(struct sched_domain *sd)
259{ 250{
260 struct ctl_table *table = sd_alloc_ctl_entry(14); 251 struct ctl_table *table = sd_alloc_ctl_entry(9);
261 252
262 if (table == NULL) 253 if (table == NULL)
263 return NULL; 254 return NULL;
264 255
265 set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); 256 set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax);
266 set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false); 257 set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax);
267 set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); 258 set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax);
268 set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); 259 set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax);
269 set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); 260 set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax);
270 set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); 261 set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax);
271 set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true ); 262 set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
272 set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false); 263 set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring);
273 set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false); 264 /* &table[8] is terminator */
274 set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false);
275 set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false);
276 set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
277 set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false);
278 /* &table[13] is terminator */
279 265
280 return table; 266 return table;
281} 267}
@@ -653,8 +639,6 @@ do { \
653 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) 639 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
654 640
655 P(nr_running); 641 P(nr_running);
656 SEQ_printf(m, " .%-30s: %lu\n", "load",
657 rq->load.weight);
658 P(nr_switches); 642 P(nr_switches);
659 P(nr_load_updates); 643 P(nr_load_updates);
660 P(nr_uninterruptible); 644 P(nr_uninterruptible);
@@ -662,11 +646,6 @@ do { \
662 SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); 646 SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
663 PN(clock); 647 PN(clock);
664 PN(clock_task); 648 PN(clock_task);
665 P(cpu_load[0]);
666 P(cpu_load[1]);
667 P(cpu_load[2]);
668 P(cpu_load[3]);
669 P(cpu_load[4]);
670#undef P 649#undef P
671#undef PN 650#undef PN
672 651
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f35930f5e528..3c11dcdedcbc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1467,8 +1467,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
1467} 1467}
1468 1468
1469static unsigned long weighted_cpuload(struct rq *rq); 1469static unsigned long weighted_cpuload(struct rq *rq);
1470static unsigned long source_load(int cpu, int type);
1471static unsigned long target_load(int cpu, int type);
1472 1470
1473/* Cached statistics for all CPUs within a node */ 1471/* Cached statistics for all CPUs within a node */
1474struct numa_stats { 1472struct numa_stats {
@@ -1621,7 +1619,7 @@ static void task_numa_compare(struct task_numa_env *env,
1621 * be incurred if the tasks were swapped. 1619 * be incurred if the tasks were swapped.
1622 */ 1620 */
1623 /* Skip this swap candidate if cannot move to the source cpu */ 1621 /* Skip this swap candidate if cannot move to the source cpu */
1624 if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) 1622 if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr))
1625 goto unlock; 1623 goto unlock;
1626 1624
1627 /* 1625 /*
@@ -1718,7 +1716,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
1718 1716
1719 for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { 1717 for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
1720 /* Skip this CPU if the source task cannot migrate */ 1718 /* Skip this CPU if the source task cannot migrate */
1721 if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) 1719 if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
1722 continue; 1720 continue;
1723 1721
1724 env->dst_cpu = cpu; 1722 env->dst_cpu = cpu;
@@ -2686,8 +2684,6 @@ static void
2686account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) 2684account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
2687{ 2685{
2688 update_load_add(&cfs_rq->load, se->load.weight); 2686 update_load_add(&cfs_rq->load, se->load.weight);
2689 if (!parent_entity(se))
2690 update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
2691#ifdef CONFIG_SMP 2687#ifdef CONFIG_SMP
2692 if (entity_is_task(se)) { 2688 if (entity_is_task(se)) {
2693 struct rq *rq = rq_of(cfs_rq); 2689 struct rq *rq = rq_of(cfs_rq);
@@ -2703,8 +2699,6 @@ static void
2703account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) 2699account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
2704{ 2700{
2705 update_load_sub(&cfs_rq->load, se->load.weight); 2701 update_load_sub(&cfs_rq->load, se->load.weight);
2706 if (!parent_entity(se))
2707 update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
2708#ifdef CONFIG_SMP 2702#ifdef CONFIG_SMP
2709 if (entity_is_task(se)) { 2703 if (entity_is_task(se)) {
2710 account_numa_dequeue(rq_of(cfs_rq), task_of(se)); 2704 account_numa_dequeue(rq_of(cfs_rq), task_of(se));
@@ -4100,7 +4094,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
4100 * least twice that of our own weight (i.e. dont track it 4094 * least twice that of our own weight (i.e. dont track it
4101 * when there are only lesser-weight tasks around): 4095 * when there are only lesser-weight tasks around):
4102 */ 4096 */
4103 if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) { 4097 if (schedstat_enabled() &&
4098 rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
4104 schedstat_set(se->statistics.slice_max, 4099 schedstat_set(se->statistics.slice_max,
4105 max((u64)schedstat_val(se->statistics.slice_max), 4100 max((u64)schedstat_val(se->statistics.slice_max),
4106 se->sum_exec_runtime - se->prev_sum_exec_runtime)); 4101 se->sum_exec_runtime - se->prev_sum_exec_runtime));
@@ -4734,6 +4729,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
4734 if (runtime_refresh_within(cfs_b, min_left)) 4729 if (runtime_refresh_within(cfs_b, min_left))
4735 return; 4730 return;
4736 4731
4732 /* don't push forwards an existing deferred unthrottle */
4733 if (cfs_b->slack_started)
4734 return;
4735 cfs_b->slack_started = true;
4736
4737 hrtimer_start(&cfs_b->slack_timer, 4737 hrtimer_start(&cfs_b->slack_timer,
4738 ns_to_ktime(cfs_bandwidth_slack_period), 4738 ns_to_ktime(cfs_bandwidth_slack_period),
4739 HRTIMER_MODE_REL); 4739 HRTIMER_MODE_REL);
@@ -4787,6 +4787,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
4787 4787
4788 /* confirm we're still not at a refresh boundary */ 4788 /* confirm we're still not at a refresh boundary */
4789 raw_spin_lock_irqsave(&cfs_b->lock, flags); 4789 raw_spin_lock_irqsave(&cfs_b->lock, flags);
4790 cfs_b->slack_started = false;
4790 if (cfs_b->distribute_running) { 4791 if (cfs_b->distribute_running) {
4791 raw_spin_unlock_irqrestore(&cfs_b->lock, flags); 4792 raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
4792 return; 4793 return;
@@ -4950,6 +4951,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
4950 hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 4951 hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4951 cfs_b->slack_timer.function = sched_cfs_slack_timer; 4952 cfs_b->slack_timer.function = sched_cfs_slack_timer;
4952 cfs_b->distribute_running = 0; 4953 cfs_b->distribute_running = 0;
4954 cfs_b->slack_started = false;
4953} 4955}
4954 4956
4955static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) 4957static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -5325,71 +5327,6 @@ DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
5325DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); 5327DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
5326 5328
5327#ifdef CONFIG_NO_HZ_COMMON 5329#ifdef CONFIG_NO_HZ_COMMON
5328/*
5329 * per rq 'load' arrray crap; XXX kill this.
5330 */
5331
5332/*
5333 * The exact cpuload calculated at every tick would be:
5334 *
5335 * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
5336 *
5337 * If a CPU misses updates for n ticks (as it was idle) and update gets
5338 * called on the n+1-th tick when CPU may be busy, then we have:
5339 *
5340 * load_n = (1 - 1/2^i)^n * load_0
5341 * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load
5342 *
5343 * decay_load_missed() below does efficient calculation of
5344 *
5345 * load' = (1 - 1/2^i)^n * load
5346 *
5347 * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors.
5348 * This allows us to precompute the above in said factors, thereby allowing the
5349 * reduction of an arbitrary n in O(log_2 n) steps. (See also
5350 * fixed_power_int())
5351 *
5352 * The calculation is approximated on a 128 point scale.
5353 */
5354#define DEGRADE_SHIFT 7
5355
5356static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
5357static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
5358 { 0, 0, 0, 0, 0, 0, 0, 0 },
5359 { 64, 32, 8, 0, 0, 0, 0, 0 },
5360 { 96, 72, 40, 12, 1, 0, 0, 0 },
5361 { 112, 98, 75, 43, 15, 1, 0, 0 },
5362 { 120, 112, 98, 76, 45, 16, 2, 0 }
5363};
5364
5365/*
5366 * Update cpu_load for any missed ticks, due to tickless idle. The backlog
5367 * would be when CPU is idle and so we just decay the old load without
5368 * adding any new load.
5369 */
5370static unsigned long
5371decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
5372{
5373 int j = 0;
5374
5375 if (!missed_updates)
5376 return load;
5377
5378 if (missed_updates >= degrade_zero_ticks[idx])
5379 return 0;
5380
5381 if (idx == 1)
5382 return load >> missed_updates;
5383
5384 while (missed_updates) {
5385 if (missed_updates % 2)
5386 load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
5387
5388 missed_updates >>= 1;
5389 j++;
5390 }
5391 return load;
5392}
5393 5330
5394static struct { 5331static struct {
5395 cpumask_var_t idle_cpus_mask; 5332 cpumask_var_t idle_cpus_mask;
@@ -5401,234 +5338,11 @@ static struct {
5401 5338
5402#endif /* CONFIG_NO_HZ_COMMON */ 5339#endif /* CONFIG_NO_HZ_COMMON */
5403 5340
5404/**
5405 * __cpu_load_update - update the rq->cpu_load[] statistics
5406 * @this_rq: The rq to update statistics for
5407 * @this_load: The current load
5408 * @pending_updates: The number of missed updates
5409 *
5410 * Update rq->cpu_load[] statistics. This function is usually called every
5411 * scheduler tick (TICK_NSEC).
5412 *
5413 * This function computes a decaying average:
5414 *
5415 * load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load
5416 *
5417 * Because of NOHZ it might not get called on every tick which gives need for
5418 * the @pending_updates argument.
5419 *
5420 * load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1
5421 * = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load
5422 * = A * (A * load[i]_n-2 + B) + B
5423 * = A * (A * (A * load[i]_n-3 + B) + B) + B
5424 * = A^3 * load[i]_n-3 + (A^2 + A + 1) * B
5425 * = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B
5426 * = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B
5427 * = (1 - 1/2^i)^n * (load[i]_0 - load) + load
5428 *
5429 * In the above we've assumed load_n := load, which is true for NOHZ_FULL as
5430 * any change in load would have resulted in the tick being turned back on.
5431 *
5432 * For regular NOHZ, this reduces to:
5433 *
5434 * load[i]_n = (1 - 1/2^i)^n * load[i]_0
5435 *
5436 * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
5437 * term.
5438 */
5439static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
5440 unsigned long pending_updates)
5441{
5442 unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0];
5443 int i, scale;
5444
5445 this_rq->nr_load_updates++;
5446
5447 /* Update our load: */
5448 this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
5449 for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
5450 unsigned long old_load, new_load;
5451
5452 /* scale is effectively 1 << i now, and >> i divides by scale */
5453
5454 old_load = this_rq->cpu_load[i];
5455#ifdef CONFIG_NO_HZ_COMMON
5456 old_load = decay_load_missed(old_load, pending_updates - 1, i);
5457 if (tickless_load) {
5458 old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
5459 /*
5460 * old_load can never be a negative value because a
5461 * decayed tickless_load cannot be greater than the
5462 * original tickless_load.
5463 */
5464 old_load += tickless_load;
5465 }
5466#endif
5467 new_load = this_load;
5468 /*
5469 * Round up the averaging division if load is increasing. This
5470 * prevents us from getting stuck on 9 if the load is 10, for
5471 * example.
5472 */
5473 if (new_load > old_load)
5474 new_load += scale - 1;
5475
5476 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
5477 }
5478}
5479
5480/* Used instead of source_load when we know the type == 0 */
5481static unsigned long weighted_cpuload(struct rq *rq) 5341static unsigned long weighted_cpuload(struct rq *rq)
5482{ 5342{
5483 return cfs_rq_runnable_load_avg(&rq->cfs); 5343 return cfs_rq_runnable_load_avg(&rq->cfs);
5484} 5344}
5485 5345
5486#ifdef CONFIG_NO_HZ_COMMON
5487/*
5488 * There is no sane way to deal with nohz on smp when using jiffies because the
5489 * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading
5490 * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
5491 *
5492 * Therefore we need to avoid the delta approach from the regular tick when
5493 * possible since that would seriously skew the load calculation. This is why we
5494 * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on
5495 * jiffies deltas for updates happening while in nohz mode (idle ticks, idle
5496 * loop exit, nohz_idle_balance, nohz full exit...)
5497 *
5498 * This means we might still be one tick off for nohz periods.
5499 */
5500
5501static void cpu_load_update_nohz(struct rq *this_rq,
5502 unsigned long curr_jiffies,
5503 unsigned long load)
5504{
5505 unsigned long pending_updates;
5506
5507 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
5508 if (pending_updates) {
5509 this_rq->last_load_update_tick = curr_jiffies;
5510 /*
5511 * In the regular NOHZ case, we were idle, this means load 0.
5512 * In the NOHZ_FULL case, we were non-idle, we should consider
5513 * its weighted load.
5514 */
5515 cpu_load_update(this_rq, load, pending_updates);
5516 }
5517}
5518
5519/*
5520 * Called from nohz_idle_balance() to update the load ratings before doing the
5521 * idle balance.
5522 */
5523static void cpu_load_update_idle(struct rq *this_rq)
5524{
5525 /*
5526 * bail if there's load or we're actually up-to-date.
5527 */
5528 if (weighted_cpuload(this_rq))
5529 return;
5530
5531 cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
5532}
5533
5534/*
5535 * Record CPU load on nohz entry so we know the tickless load to account
5536 * on nohz exit. cpu_load[0] happens then to be updated more frequently
5537 * than other cpu_load[idx] but it should be fine as cpu_load readers
5538 * shouldn't rely into synchronized cpu_load[*] updates.
5539 */
5540void cpu_load_update_nohz_start(void)
5541{
5542 struct rq *this_rq = this_rq();
5543
5544 /*
5545 * This is all lockless but should be fine. If weighted_cpuload changes
5546 * concurrently we'll exit nohz. And cpu_load write can race with
5547 * cpu_load_update_idle() but both updater would be writing the same.
5548 */
5549 this_rq->cpu_load[0] = weighted_cpuload(this_rq);
5550}
5551
5552/*
5553 * Account the tickless load in the end of a nohz frame.
5554 */
5555void cpu_load_update_nohz_stop(void)
5556{
5557 unsigned long curr_jiffies = READ_ONCE(jiffies);
5558 struct rq *this_rq = this_rq();
5559 unsigned long load;
5560 struct rq_flags rf;
5561
5562 if (curr_jiffies == this_rq->last_load_update_tick)
5563 return;
5564
5565 load = weighted_cpuload(this_rq);
5566 rq_lock(this_rq, &rf);
5567 update_rq_clock(this_rq);
5568 cpu_load_update_nohz(this_rq, curr_jiffies, load);
5569 rq_unlock(this_rq, &rf);
5570}
5571#else /* !CONFIG_NO_HZ_COMMON */
5572static inline void cpu_load_update_nohz(struct rq *this_rq,
5573 unsigned long curr_jiffies,
5574 unsigned long load) { }
5575#endif /* CONFIG_NO_HZ_COMMON */
5576
5577static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load)
5578{
5579#ifdef CONFIG_NO_HZ_COMMON
5580 /* See the mess around cpu_load_update_nohz(). */
5581 this_rq->last_load_update_tick = READ_ONCE(jiffies);
5582#endif
5583 cpu_load_update(this_rq, load, 1);
5584}
5585
5586/*
5587 * Called from scheduler_tick()
5588 */
5589void cpu_load_update_active(struct rq *this_rq)
5590{
5591 unsigned long load = weighted_cpuload(this_rq);
5592
5593 if (tick_nohz_tick_stopped())
5594 cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
5595 else
5596 cpu_load_update_periodic(this_rq, load);
5597}
5598
5599/*
5600 * Return a low guess at the load of a migration-source CPU weighted
5601 * according to the scheduling class and "nice" value.
5602 *
5603 * We want to under-estimate the load of migration sources, to
5604 * balance conservatively.
5605 */
5606static unsigned long source_load(int cpu, int type)
5607{
5608 struct rq *rq = cpu_rq(cpu);
5609 unsigned long total = weighted_cpuload(rq);
5610
5611 if (type == 0 || !sched_feat(LB_BIAS))
5612 return total;
5613
5614 return min(rq->cpu_load[type-1], total);
5615}
5616
5617/*
5618 * Return a high guess at the load of a migration-target CPU weighted
5619 * according to the scheduling class and "nice" value.
5620 */
5621static unsigned long target_load(int cpu, int type)
5622{
5623 struct rq *rq = cpu_rq(cpu);
5624 unsigned long total = weighted_cpuload(rq);
5625
5626 if (type == 0 || !sched_feat(LB_BIAS))
5627 return total;
5628
5629 return max(rq->cpu_load[type-1], total);
5630}
5631
5632static unsigned long capacity_of(int cpu) 5346static unsigned long capacity_of(int cpu)
5633{ 5347{
5634 return cpu_rq(cpu)->cpu_capacity; 5348 return cpu_rq(cpu)->cpu_capacity;
@@ -5736,7 +5450,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
5736 s64 this_eff_load, prev_eff_load; 5450 s64 this_eff_load, prev_eff_load;
5737 unsigned long task_load; 5451 unsigned long task_load;
5738 5452
5739 this_eff_load = target_load(this_cpu, sd->wake_idx); 5453 this_eff_load = weighted_cpuload(cpu_rq(this_cpu));
5740 5454
5741 if (sync) { 5455 if (sync) {
5742 unsigned long current_load = task_h_load(current); 5456 unsigned long current_load = task_h_load(current);
@@ -5754,7 +5468,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
5754 this_eff_load *= 100; 5468 this_eff_load *= 100;
5755 this_eff_load *= capacity_of(prev_cpu); 5469 this_eff_load *= capacity_of(prev_cpu);
5756 5470
5757 prev_eff_load = source_load(prev_cpu, sd->wake_idx); 5471 prev_eff_load = weighted_cpuload(cpu_rq(prev_cpu));
5758 prev_eff_load -= task_load; 5472 prev_eff_load -= task_load;
5759 if (sched_feat(WA_BIAS)) 5473 if (sched_feat(WA_BIAS))
5760 prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; 5474 prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -5815,14 +5529,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
5815 unsigned long this_runnable_load = ULONG_MAX; 5529 unsigned long this_runnable_load = ULONG_MAX;
5816 unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX; 5530 unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX;
5817 unsigned long most_spare = 0, this_spare = 0; 5531 unsigned long most_spare = 0, this_spare = 0;
5818 int load_idx = sd->forkexec_idx;
5819 int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; 5532 int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
5820 unsigned long imbalance = scale_load_down(NICE_0_LOAD) * 5533 unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
5821 (sd->imbalance_pct-100) / 100; 5534 (sd->imbalance_pct-100) / 100;
5822 5535
5823 if (sd_flag & SD_BALANCE_WAKE)
5824 load_idx = sd->wake_idx;
5825
5826 do { 5536 do {
5827 unsigned long load, avg_load, runnable_load; 5537 unsigned long load, avg_load, runnable_load;
5828 unsigned long spare_cap, max_spare_cap; 5538 unsigned long spare_cap, max_spare_cap;
@@ -5831,7 +5541,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
5831 5541
5832 /* Skip over this group if it has no CPUs allowed */ 5542 /* Skip over this group if it has no CPUs allowed */
5833 if (!cpumask_intersects(sched_group_span(group), 5543 if (!cpumask_intersects(sched_group_span(group),
5834 &p->cpus_allowed)) 5544 p->cpus_ptr))
5835 continue; 5545 continue;
5836 5546
5837 local_group = cpumask_test_cpu(this_cpu, 5547 local_group = cpumask_test_cpu(this_cpu,
@@ -5846,12 +5556,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
5846 max_spare_cap = 0; 5556 max_spare_cap = 0;
5847 5557
5848 for_each_cpu(i, sched_group_span(group)) { 5558 for_each_cpu(i, sched_group_span(group)) {
5849 /* Bias balancing toward CPUs of our domain */ 5559 load = weighted_cpuload(cpu_rq(i));
5850 if (local_group)
5851 load = source_load(i, load_idx);
5852 else
5853 load = target_load(i, load_idx);
5854
5855 runnable_load += load; 5560 runnable_load += load;
5856 5561
5857 avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); 5562 avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
@@ -5963,7 +5668,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
5963 return cpumask_first(sched_group_span(group)); 5668 return cpumask_first(sched_group_span(group));
5964 5669
5965 /* Traverse only the allowed CPUs */ 5670 /* Traverse only the allowed CPUs */
5966 for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { 5671 for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
5967 if (available_idle_cpu(i)) { 5672 if (available_idle_cpu(i)) {
5968 struct rq *rq = cpu_rq(i); 5673 struct rq *rq = cpu_rq(i);
5969 struct cpuidle_state *idle = idle_get_state(rq); 5674 struct cpuidle_state *idle = idle_get_state(rq);
@@ -6003,7 +5708,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
6003{ 5708{
6004 int new_cpu = cpu; 5709 int new_cpu = cpu;
6005 5710
6006 if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) 5711 if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr))
6007 return prev_cpu; 5712 return prev_cpu;
6008 5713
6009 /* 5714 /*
@@ -6120,7 +5825,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
6120 if (!test_idle_cores(target, false)) 5825 if (!test_idle_cores(target, false))
6121 return -1; 5826 return -1;
6122 5827
6123 cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); 5828 cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
6124 5829
6125 for_each_cpu_wrap(core, cpus, target) { 5830 for_each_cpu_wrap(core, cpus, target) {
6126 bool idle = true; 5831 bool idle = true;
@@ -6154,7 +5859,7 @@ static int select_idle_smt(struct task_struct *p, int target)
6154 return -1; 5859 return -1;
6155 5860
6156 for_each_cpu(cpu, cpu_smt_mask(target)) { 5861 for_each_cpu(cpu, cpu_smt_mask(target)) {
6157 if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) 5862 if (!cpumask_test_cpu(cpu, p->cpus_ptr))
6158 continue; 5863 continue;
6159 if (available_idle_cpu(cpu)) 5864 if (available_idle_cpu(cpu))
6160 return cpu; 5865 return cpu;
@@ -6217,7 +5922,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
6217 for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { 5922 for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
6218 if (!--nr) 5923 if (!--nr)
6219 return -1; 5924 return -1;
6220 if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) 5925 if (!cpumask_test_cpu(cpu, p->cpus_ptr))
6221 continue; 5926 continue;
6222 if (available_idle_cpu(cpu)) 5927 if (available_idle_cpu(cpu))
6223 break; 5928 break;
@@ -6254,7 +5959,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
6254 recent_used_cpu != target && 5959 recent_used_cpu != target &&
6255 cpus_share_cache(recent_used_cpu, target) && 5960 cpus_share_cache(recent_used_cpu, target) &&
6256 available_idle_cpu(recent_used_cpu) && 5961 available_idle_cpu(recent_used_cpu) &&
6257 cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { 5962 cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
6258 /* 5963 /*
6259 * Replace recent_used_cpu with prev as it is a potential 5964 * Replace recent_used_cpu with prev as it is a potential
6260 * candidate for the next wake: 5965 * candidate for the next wake:
@@ -6600,7 +6305,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
6600 int max_spare_cap_cpu = -1; 6305 int max_spare_cap_cpu = -1;
6601 6306
6602 for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { 6307 for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
6603 if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) 6308 if (!cpumask_test_cpu(cpu, p->cpus_ptr))
6604 continue; 6309 continue;
6605 6310
6606 /* Skip CPUs that will be overutilized. */ 6311 /* Skip CPUs that will be overutilized. */
@@ -6689,7 +6394,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
6689 } 6394 }
6690 6395
6691 want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) && 6396 want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
6692 cpumask_test_cpu(cpu, &p->cpus_allowed); 6397 cpumask_test_cpu(cpu, p->cpus_ptr);
6693 } 6398 }
6694 6399
6695 rcu_read_lock(); 6400 rcu_read_lock();
@@ -7445,14 +7150,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
7445 /* 7150 /*
7446 * We do not migrate tasks that are: 7151 * We do not migrate tasks that are:
7447 * 1) throttled_lb_pair, or 7152 * 1) throttled_lb_pair, or
7448 * 2) cannot be migrated to this CPU due to cpus_allowed, or 7153 * 2) cannot be migrated to this CPU due to cpus_ptr, or
7449 * 3) running (obviously), or 7154 * 3) running (obviously), or
7450 * 4) are cache-hot on their current CPU. 7155 * 4) are cache-hot on their current CPU.
7451 */ 7156 */
7452 if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) 7157 if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
7453 return 0; 7158 return 0;
7454 7159
7455 if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { 7160 if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
7456 int cpu; 7161 int cpu;
7457 7162
7458 schedstat_inc(p->se.statistics.nr_failed_migrations_affine); 7163 schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
@@ -7472,7 +7177,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
7472 7177
7473 /* Prevent to re-select dst_cpu via env's CPUs: */ 7178 /* Prevent to re-select dst_cpu via env's CPUs: */
7474 for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { 7179 for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
7475 if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { 7180 if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
7476 env->flags |= LBF_DST_PINNED; 7181 env->flags |= LBF_DST_PINNED;
7477 env->new_dst_cpu = cpu; 7182 env->new_dst_cpu = cpu;
7478 break; 7183 break;
@@ -7695,6 +7400,7 @@ static void attach_tasks(struct lb_env *env)
7695 rq_unlock(env->dst_rq, &rf); 7400 rq_unlock(env->dst_rq, &rf);
7696} 7401}
7697 7402
7403#ifdef CONFIG_NO_HZ_COMMON
7698static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) 7404static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
7699{ 7405{
7700 if (cfs_rq->avg.load_avg) 7406 if (cfs_rq->avg.load_avg)
@@ -7722,6 +7428,19 @@ static inline bool others_have_blocked(struct rq *rq)
7722 return false; 7428 return false;
7723} 7429}
7724 7430
7431static inline void update_blocked_load_status(struct rq *rq, bool has_blocked)
7432{
7433 rq->last_blocked_load_update_tick = jiffies;
7434
7435 if (!has_blocked)
7436 rq->has_blocked_load = 0;
7437}
7438#else
7439static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) { return false; }
7440static inline bool others_have_blocked(struct rq *rq) { return false; }
7441static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {}
7442#endif
7443
7725#ifdef CONFIG_FAIR_GROUP_SCHED 7444#ifdef CONFIG_FAIR_GROUP_SCHED
7726 7445
7727static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) 7446static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
@@ -7787,11 +7506,7 @@ static void update_blocked_averages(int cpu)
7787 if (others_have_blocked(rq)) 7506 if (others_have_blocked(rq))
7788 done = false; 7507 done = false;
7789 7508
7790#ifdef CONFIG_NO_HZ_COMMON 7509 update_blocked_load_status(rq, !done);
7791 rq->last_blocked_load_update_tick = jiffies;
7792 if (done)
7793 rq->has_blocked_load = 0;
7794#endif
7795 rq_unlock_irqrestore(rq, &rf); 7510 rq_unlock_irqrestore(rq, &rf);
7796} 7511}
7797 7512
@@ -7857,11 +7572,7 @@ static inline void update_blocked_averages(int cpu)
7857 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class); 7572 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
7858 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class); 7573 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
7859 update_irq_load_avg(rq, 0); 7574 update_irq_load_avg(rq, 0);
7860#ifdef CONFIG_NO_HZ_COMMON 7575 update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
7861 rq->last_blocked_load_update_tick = jiffies;
7862 if (!cfs_rq_has_blocked(cfs_rq) && !others_have_blocked(rq))
7863 rq->has_blocked_load = 0;
7864#endif
7865 rq_unlock_irqrestore(rq, &rf); 7576 rq_unlock_irqrestore(rq, &rf);
7866} 7577}
7867 7578
@@ -7879,7 +7590,6 @@ static unsigned long task_h_load(struct task_struct *p)
7879struct sg_lb_stats { 7590struct sg_lb_stats {
7880 unsigned long avg_load; /*Avg load across the CPUs of the group */ 7591 unsigned long avg_load; /*Avg load across the CPUs of the group */
7881 unsigned long group_load; /* Total load over the CPUs of the group */ 7592 unsigned long group_load; /* Total load over the CPUs of the group */
7882 unsigned long sum_weighted_load; /* Weighted load of group's tasks */
7883 unsigned long load_per_task; 7593 unsigned long load_per_task;
7884 unsigned long group_capacity; 7594 unsigned long group_capacity;
7885 unsigned long group_util; /* Total utilization of the group */ 7595 unsigned long group_util; /* Total utilization of the group */
@@ -7933,34 +7643,6 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
7933 }; 7643 };
7934} 7644}
7935 7645
7936/**
7937 * get_sd_load_idx - Obtain the load index for a given sched domain.
7938 * @sd: The sched_domain whose load_idx is to be obtained.
7939 * @idle: The idle status of the CPU for whose sd load_idx is obtained.
7940 *
7941 * Return: The load index.
7942 */
7943static inline int get_sd_load_idx(struct sched_domain *sd,
7944 enum cpu_idle_type idle)
7945{
7946 int load_idx;
7947
7948 switch (idle) {
7949 case CPU_NOT_IDLE:
7950 load_idx = sd->busy_idx;
7951 break;
7952
7953 case CPU_NEWLY_IDLE:
7954 load_idx = sd->newidle_idx;
7955 break;
7956 default:
7957 load_idx = sd->idle_idx;
7958 break;
7959 }
7960
7961 return load_idx;
7962}
7963
7964static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu) 7646static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
7965{ 7647{
7966 struct rq *rq = cpu_rq(cpu); 7648 struct rq *rq = cpu_rq(cpu);
@@ -8099,7 +7781,7 @@ static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
8099 7781
8100/* 7782/*
8101 * Group imbalance indicates (and tries to solve) the problem where balancing 7783 * Group imbalance indicates (and tries to solve) the problem where balancing
8102 * groups is inadequate due to ->cpus_allowed constraints. 7784 * groups is inadequate due to ->cpus_ptr constraints.
8103 * 7785 *
8104 * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a 7786 * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
8105 * cpumask covering 1 CPU of the first group and 3 CPUs of the second group. 7787 * cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
@@ -8249,9 +7931,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
8249 struct sg_lb_stats *sgs, 7931 struct sg_lb_stats *sgs,
8250 int *sg_status) 7932 int *sg_status)
8251{ 7933{
8252 int local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group));
8253 int load_idx = get_sd_load_idx(env->sd, env->idle);
8254 unsigned long load;
8255 int i, nr_running; 7934 int i, nr_running;
8256 7935
8257 memset(sgs, 0, sizeof(*sgs)); 7936 memset(sgs, 0, sizeof(*sgs));
@@ -8262,13 +7941,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
8262 if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false)) 7941 if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
8263 env->flags |= LBF_NOHZ_AGAIN; 7942 env->flags |= LBF_NOHZ_AGAIN;
8264 7943
8265 /* Bias balancing toward CPUs of our domain: */ 7944 sgs->group_load += weighted_cpuload(rq);
8266 if (local_group)
8267 load = target_load(i, load_idx);
8268 else
8269 load = source_load(i, load_idx);
8270
8271 sgs->group_load += load;
8272 sgs->group_util += cpu_util(i); 7945 sgs->group_util += cpu_util(i);
8273 sgs->sum_nr_running += rq->cfs.h_nr_running; 7946 sgs->sum_nr_running += rq->cfs.h_nr_running;
8274 7947
@@ -8283,7 +7956,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
8283 sgs->nr_numa_running += rq->nr_numa_running; 7956 sgs->nr_numa_running += rq->nr_numa_running;
8284 sgs->nr_preferred_running += rq->nr_preferred_running; 7957 sgs->nr_preferred_running += rq->nr_preferred_running;
8285#endif 7958#endif
8286 sgs->sum_weighted_load += weighted_cpuload(rq);
8287 /* 7959 /*
8288 * No need to call idle_cpu() if nr_running is not 0 7960 * No need to call idle_cpu() if nr_running is not 0
8289 */ 7961 */
@@ -8302,7 +7974,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
8302 sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; 7974 sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
8303 7975
8304 if (sgs->sum_nr_running) 7976 if (sgs->sum_nr_running)
8305 sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; 7977 sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
8306 7978
8307 sgs->group_weight = group->group_weight; 7979 sgs->group_weight = group->group_weight;
8308 7980
@@ -8768,7 +8440,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
8768 /* 8440 /*
8769 * If the busiest group is imbalanced the below checks don't 8441 * If the busiest group is imbalanced the below checks don't
8770 * work because they assume all things are equal, which typically 8442 * work because they assume all things are equal, which typically
8771 * isn't true due to cpus_allowed constraints and the like. 8443 * isn't true due to cpus_ptr constraints and the like.
8772 */ 8444 */
8773 if (busiest->group_type == group_imbalanced) 8445 if (busiest->group_type == group_imbalanced)
8774 goto force_balance; 8446 goto force_balance;
@@ -9210,7 +8882,7 @@ more_balance:
9210 * if the curr task on busiest CPU can't be 8882 * if the curr task on busiest CPU can't be
9211 * moved to this_cpu: 8883 * moved to this_cpu:
9212 */ 8884 */
9213 if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { 8885 if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
9214 raw_spin_unlock_irqrestore(&busiest->lock, 8886 raw_spin_unlock_irqrestore(&busiest->lock,
9215 flags); 8887 flags);
9216 env.flags |= LBF_ALL_PINNED; 8888 env.flags |= LBF_ALL_PINNED;
@@ -9879,7 +9551,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
9879 9551
9880 rq_lock_irqsave(rq, &rf); 9552 rq_lock_irqsave(rq, &rf);
9881 update_rq_clock(rq); 9553 update_rq_clock(rq);
9882 cpu_load_update_idle(rq);
9883 rq_unlock_irqrestore(rq, &rf); 9554 rq_unlock_irqrestore(rq, &rf);
9884 9555
9885 if (flags & NOHZ_BALANCE_KICK) 9556 if (flags & NOHZ_BALANCE_KICK)
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 858589b83377..2410db5e9a35 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -39,7 +39,6 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true)
39 39
40SCHED_FEAT(HRTICK, false) 40SCHED_FEAT(HRTICK, false)
41SCHED_FEAT(DOUBLE_TICK, false) 41SCHED_FEAT(DOUBLE_TICK, false)
42SCHED_FEAT(LB_BIAS, false)
43 42
44/* 43/*
45 * Decrement CPU capacity based on time not spent running tasks 44 * Decrement CPU capacity based on time not spent running tasks
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 1e6b909dca36..63ad7c90822c 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1614,7 +1614,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1614static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1614static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1615{ 1615{
1616 if (!task_running(rq, p) && 1616 if (!task_running(rq, p) &&
1617 cpumask_test_cpu(cpu, &p->cpus_allowed)) 1617 cpumask_test_cpu(cpu, p->cpus_ptr))
1618 return 1; 1618 return 1;
1619 1619
1620 return 0; 1620 return 0;
@@ -1751,7 +1751,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1751 * Also make sure that it wasn't scheduled on its rq. 1751 * Also make sure that it wasn't scheduled on its rq.
1752 */ 1752 */
1753 if (unlikely(task_rq(task) != rq || 1753 if (unlikely(task_rq(task) != rq ||
1754 !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || 1754 !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
1755 task_running(rq, task) || 1755 task_running(rq, task) ||
1756 !rt_task(task) || 1756 !rt_task(task) ||
1757 !task_on_rq_queued(task))) { 1757 !task_on_rq_queued(task))) {
diff --git a/kernel/sched/sched-pelt.h b/kernel/sched/sched-pelt.h
index a26473674fb7..c529706bed11 100644
--- a/kernel/sched/sched-pelt.h
+++ b/kernel/sched/sched-pelt.h
@@ -1,7 +1,7 @@
1/* SPDX-License-Identifier: GPL-2.0 */ 1/* SPDX-License-Identifier: GPL-2.0 */
2/* Generated by Documentation/scheduler/sched-pelt; do not modify. */ 2/* Generated by Documentation/scheduler/sched-pelt; do not modify. */
3 3
4static const u32 runnable_avg_yN_inv[] = { 4static const u32 runnable_avg_yN_inv[] __maybe_unused = {
5 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, 5 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
6 0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85, 6 0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
7 0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581, 7 0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b52ed1ada0be..b08dee29ef5e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -96,12 +96,6 @@ extern atomic_long_t calc_load_tasks;
96extern void calc_global_load_tick(struct rq *this_rq); 96extern void calc_global_load_tick(struct rq *this_rq);
97extern long calc_load_fold_active(struct rq *this_rq, long adjust); 97extern long calc_load_fold_active(struct rq *this_rq, long adjust);
98 98
99#ifdef CONFIG_SMP
100extern void cpu_load_update_active(struct rq *this_rq);
101#else
102static inline void cpu_load_update_active(struct rq *this_rq) { }
103#endif
104
105/* 99/*
106 * Helpers for converting nanosecond timing to jiffy resolution 100 * Helpers for converting nanosecond timing to jiffy resolution
107 */ 101 */
@@ -344,8 +338,10 @@ struct cfs_bandwidth {
344 u64 runtime_expires; 338 u64 runtime_expires;
345 int expires_seq; 339 int expires_seq;
346 340
347 short idle; 341 u8 idle;
348 short period_active; 342 u8 period_active;
343 u8 distribute_running;
344 u8 slack_started;
349 struct hrtimer period_timer; 345 struct hrtimer period_timer;
350 struct hrtimer slack_timer; 346 struct hrtimer slack_timer;
351 struct list_head throttled_cfs_rq; 347 struct list_head throttled_cfs_rq;
@@ -354,8 +350,6 @@ struct cfs_bandwidth {
354 int nr_periods; 350 int nr_periods;
355 int nr_throttled; 351 int nr_throttled;
356 u64 throttled_time; 352 u64 throttled_time;
357
358 bool distribute_running;
359#endif 353#endif
360}; 354};
361 355
@@ -818,8 +812,6 @@ struct rq {
818 unsigned int nr_preferred_running; 812 unsigned int nr_preferred_running;
819 unsigned int numa_migrate_on; 813 unsigned int numa_migrate_on;
820#endif 814#endif
821 #define CPU_LOAD_IDX_MAX 5
822 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
823#ifdef CONFIG_NO_HZ_COMMON 815#ifdef CONFIG_NO_HZ_COMMON
824#ifdef CONFIG_SMP 816#ifdef CONFIG_SMP
825 unsigned long last_load_update_tick; 817 unsigned long last_load_update_tick;
@@ -830,8 +822,6 @@ struct rq {
830 atomic_t nohz_flags; 822 atomic_t nohz_flags;
831#endif /* CONFIG_NO_HZ_COMMON */ 823#endif /* CONFIG_NO_HZ_COMMON */
832 824
833 /* capture load from *all* tasks on this CPU: */
834 struct load_weight load;
835 unsigned long nr_load_updates; 825 unsigned long nr_load_updates;
836 u64 nr_switches; 826 u64 nr_switches;
837 827
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f53f89df837d..63184cf0d0d7 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1344,11 +1344,6 @@ sd_init(struct sched_domain_topology_level *tl,
1344 .imbalance_pct = 125, 1344 .imbalance_pct = 125,
1345 1345
1346 .cache_nice_tries = 0, 1346 .cache_nice_tries = 0,
1347 .busy_idx = 0,
1348 .idle_idx = 0,
1349 .newidle_idx = 0,
1350 .wake_idx = 0,
1351 .forkexec_idx = 0,
1352 1347
1353 .flags = 1*SD_LOAD_BALANCE 1348 .flags = 1*SD_LOAD_BALANCE
1354 | 1*SD_BALANCE_NEWIDLE 1349 | 1*SD_BALANCE_NEWIDLE
@@ -1400,13 +1395,10 @@ sd_init(struct sched_domain_topology_level *tl,
1400 } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { 1395 } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
1401 sd->imbalance_pct = 117; 1396 sd->imbalance_pct = 117;
1402 sd->cache_nice_tries = 1; 1397 sd->cache_nice_tries = 1;
1403 sd->busy_idx = 2;
1404 1398
1405#ifdef CONFIG_NUMA 1399#ifdef CONFIG_NUMA
1406 } else if (sd->flags & SD_NUMA) { 1400 } else if (sd->flags & SD_NUMA) {
1407 sd->cache_nice_tries = 2; 1401 sd->cache_nice_tries = 2;
1408 sd->busy_idx = 3;
1409 sd->idle_idx = 2;
1410 1402
1411 sd->flags &= ~SD_PREFER_SIBLING; 1403 sd->flags &= ~SD_PREFER_SIBLING;
1412 sd->flags |= SD_SERIALIZE; 1404 sd->flags |= SD_SERIALIZE;
@@ -1419,8 +1411,6 @@ sd_init(struct sched_domain_topology_level *tl,
1419#endif 1411#endif
1420 } else { 1412 } else {
1421 sd->cache_nice_tries = 1; 1413 sd->cache_nice_tries = 1;
1422 sd->busy_idx = 2;
1423 sd->idle_idx = 1;
1424 } 1414 }
1425 1415
1426 /* 1416 /*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f4ee1a3428ae..be9707f68024 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -782,7 +782,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
782 */ 782 */
783 if (!ts->tick_stopped) { 783 if (!ts->tick_stopped) {
784 calc_load_nohz_start(); 784 calc_load_nohz_start();
785 cpu_load_update_nohz_start();
786 quiet_vmstat(); 785 quiet_vmstat();
787 786
788 ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 787 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
@@ -829,7 +828,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
829{ 828{
830 /* Update jiffies first */ 829 /* Update jiffies first */
831 tick_do_update_jiffies64(now); 830 tick_do_update_jiffies64(now);
832 cpu_load_update_nohz_stop();
833 831
834 /* 832 /*
835 * Clear the timer idle flag, so we avoid IPIs on remote queueing and 833 * Clear the timer idle flag, so we avoid IPIs on remote queueing and
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 1e6db9cbe4dc..fa95139445b2 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -277,7 +277,7 @@ static void move_to_next_cpu(void)
277 * of this thread, than stop migrating for the duration 277 * of this thread, than stop migrating for the duration
278 * of the current test. 278 * of the current test.
279 */ 279 */
280 if (!cpumask_equal(current_mask, &current->cpus_allowed)) 280 if (!cpumask_equal(current_mask, current->cpus_ptr))
281 goto disable; 281 goto disable;
282 282
283 get_online_cpus(); 283 get_online_cpus();
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 157d9e31f6c2..60ba93fc42ce 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
23 * Kernel threads bound to a single CPU can safely use 23 * Kernel threads bound to a single CPU can safely use
24 * smp_processor_id(): 24 * smp_processor_id():
25 */ 25 */
26 if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu))) 26 if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
27 goto out; 27 goto out;
28 28
29 /* 29 /*
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 1da597aa6141..1a72b7d95cdc 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -34,7 +34,7 @@ static void simple_thread_func(int cnt)
34 34
35 /* Silly tracepoints */ 35 /* Silly tracepoints */
36 trace_foo_bar("hello", cnt, array, random_strings[len], 36 trace_foo_bar("hello", cnt, array, random_strings[len],
37 &current->cpus_allowed); 37 current->cpus_ptr);
38 38
39 trace_foo_with_template_simple("HELLO", cnt); 39 trace_foo_with_template_simple("HELLO", cnt);
40 40