aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-10-07 06:29:16 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-09 06:40:46 -0400
commitac66f5477239ebd3c4e2cbf2f591ef387aa09884 (patch)
treed340545fa9235f4369c2bd50852fa3a2b7e807f1 /kernel/sched
parent1be0bd77c5dd7c903f46abf52f9a3650face3c1d (diff)
sched/numa: Introduce migrate_swap()
Use the new stop_two_cpus() to implement migrate_swap(), a function that flips two tasks between their respective cpus. I'm fairly sure there's a less crude way than employing the stop_two_cpus() method, but everything I tried either got horribly fragile and/or complex. So keep it simple for now. The notable detail is how we 'migrate' tasks that aren't runnable anymore. We'll make it appear like we migrated them before they went to sleep. The sole difference is the previous cpu in the wakeup path, so we override this. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Link: http://lkml.kernel.org/r/1381141781-10992-39-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c106
-rw-r--r--kernel/sched/fair.c3
-rw-r--r--kernel/sched/idle_task.c2
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/sched/stop_task.c2
6 files changed, 108 insertions, 14 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9060a7f4e9ed..32a2b29c2610 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1013,6 +1013,102 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1013 __set_task_cpu(p, new_cpu); 1013 __set_task_cpu(p, new_cpu);
1014} 1014}
1015 1015
1016static void __migrate_swap_task(struct task_struct *p, int cpu)
1017{
1018 if (p->on_rq) {
1019 struct rq *src_rq, *dst_rq;
1020
1021 src_rq = task_rq(p);
1022 dst_rq = cpu_rq(cpu);
1023
1024 deactivate_task(src_rq, p, 0);
1025 set_task_cpu(p, cpu);
1026 activate_task(dst_rq, p, 0);
1027 check_preempt_curr(dst_rq, p, 0);
1028 } else {
1029 /*
1030 * Task isn't running anymore; make it appear like we migrated
1031 * it before it went to sleep. This means on wakeup we make the
1032 * previous cpu our targer instead of where it really is.
1033 */
1034 p->wake_cpu = cpu;
1035 }
1036}
1037
1038struct migration_swap_arg {
1039 struct task_struct *src_task, *dst_task;
1040 int src_cpu, dst_cpu;
1041};
1042
1043static int migrate_swap_stop(void *data)
1044{
1045 struct migration_swap_arg *arg = data;
1046 struct rq *src_rq, *dst_rq;
1047 int ret = -EAGAIN;
1048
1049 src_rq = cpu_rq(arg->src_cpu);
1050 dst_rq = cpu_rq(arg->dst_cpu);
1051
1052 double_rq_lock(src_rq, dst_rq);
1053 if (task_cpu(arg->dst_task) != arg->dst_cpu)
1054 goto unlock;
1055
1056 if (task_cpu(arg->src_task) != arg->src_cpu)
1057 goto unlock;
1058
1059 if (!cpumask_test_cpu(arg->dst_cpu, tsk_cpus_allowed(arg->src_task)))
1060 goto unlock;
1061
1062 if (!cpumask_test_cpu(arg->src_cpu, tsk_cpus_allowed(arg->dst_task)))
1063 goto unlock;
1064
1065 __migrate_swap_task(arg->src_task, arg->dst_cpu);
1066 __migrate_swap_task(arg->dst_task, arg->src_cpu);
1067
1068 ret = 0;
1069
1070unlock:
1071 double_rq_unlock(src_rq, dst_rq);
1072
1073 return ret;
1074}
1075
1076/*
1077 * Cross migrate two tasks
1078 */
1079int migrate_swap(struct task_struct *cur, struct task_struct *p)
1080{
1081 struct migration_swap_arg arg;
1082 int ret = -EINVAL;
1083
1084 get_online_cpus();
1085
1086 arg = (struct migration_swap_arg){
1087 .src_task = cur,
1088 .src_cpu = task_cpu(cur),
1089 .dst_task = p,
1090 .dst_cpu = task_cpu(p),
1091 };
1092
1093 if (arg.src_cpu == arg.dst_cpu)
1094 goto out;
1095
1096 if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
1097 goto out;
1098
1099 if (!cpumask_test_cpu(arg.dst_cpu, tsk_cpus_allowed(arg.src_task)))
1100 goto out;
1101
1102 if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task)))
1103 goto out;
1104
1105 ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg);
1106
1107out:
1108 put_online_cpus();
1109 return ret;
1110}
1111
1016struct migration_arg { 1112struct migration_arg {
1017 struct task_struct *task; 1113 struct task_struct *task;
1018 int dest_cpu; 1114 int dest_cpu;
@@ -1232,9 +1328,9 @@ out:
1232 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. 1328 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
1233 */ 1329 */
1234static inline 1330static inline
1235int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) 1331int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
1236{ 1332{
1237 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); 1333 cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
1238 1334
1239 /* 1335 /*
1240 * In order not to call set_task_cpu() on a blocking task we need 1336 * In order not to call set_task_cpu() on a blocking task we need
@@ -1518,7 +1614,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1518 if (p->sched_class->task_waking) 1614 if (p->sched_class->task_waking)
1519 p->sched_class->task_waking(p); 1615 p->sched_class->task_waking(p);
1520 1616
1521 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 1617 cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
1522 if (task_cpu(p) != cpu) { 1618 if (task_cpu(p) != cpu) {
1523 wake_flags |= WF_MIGRATED; 1619 wake_flags |= WF_MIGRATED;
1524 set_task_cpu(p, cpu); 1620 set_task_cpu(p, cpu);
@@ -1752,7 +1848,7 @@ void wake_up_new_task(struct task_struct *p)
1752 * - cpus_allowed can change in the fork path 1848 * - cpus_allowed can change in the fork path
1753 * - any previously selected cpu might disappear through hotplug 1849 * - any previously selected cpu might disappear through hotplug
1754 */ 1850 */
1755 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); 1851 set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
1756#endif 1852#endif
1757 1853
1758 /* Initialize new task's runnable average */ 1854 /* Initialize new task's runnable average */
@@ -2080,7 +2176,7 @@ void sched_exec(void)
2080 int dest_cpu; 2176 int dest_cpu;
2081 2177
2082 raw_spin_lock_irqsave(&p->pi_lock, flags); 2178 raw_spin_lock_irqsave(&p->pi_lock, flags);
2083 dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0); 2179 dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
2084 if (dest_cpu == smp_processor_id()) 2180 if (dest_cpu == smp_processor_id())
2085 goto unlock; 2181 goto unlock;
2086 2182
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fbc0c84a8a04..b1e5061287ab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3706,11 +3706,10 @@ done:
3706 * preempt must be disabled. 3706 * preempt must be disabled.
3707 */ 3707 */
3708static int 3708static int
3709select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) 3709select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
3710{ 3710{
3711 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; 3711 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
3712 int cpu = smp_processor_id(); 3712 int cpu = smp_processor_id();
3713 int prev_cpu = task_cpu(p);
3714 int new_cpu = cpu; 3713 int new_cpu = cpu;
3715 int want_affine = 0; 3714 int want_affine = 0;
3716 int sync = wake_flags & WF_SYNC; 3715 int sync = wake_flags & WF_SYNC;
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index d8da01008d39..516c3d9ceea1 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -9,7 +9,7 @@
9 9
10#ifdef CONFIG_SMP 10#ifdef CONFIG_SMP
11static int 11static int
12select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) 12select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
13{ 13{
14 return task_cpu(p); /* IDLE tasks as never migrated */ 14 return task_cpu(p); /* IDLE tasks as never migrated */
15} 15}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ceebfba0a1dd..e9304cdc26fe 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1169,13 +1169,10 @@ static void yield_task_rt(struct rq *rq)
1169static int find_lowest_rq(struct task_struct *task); 1169static int find_lowest_rq(struct task_struct *task);
1170 1170
1171static int 1171static int
1172select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) 1172select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
1173{ 1173{
1174 struct task_struct *curr; 1174 struct task_struct *curr;
1175 struct rq *rq; 1175 struct rq *rq;
1176 int cpu;
1177
1178 cpu = task_cpu(p);
1179 1176
1180 if (p->nr_cpus_allowed == 1) 1177 if (p->nr_cpus_allowed == 1)
1181 goto out; 1178 goto out;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 66458c902d84..4dc92d016aef 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -558,6 +558,7 @@ static inline u64 rq_clock_task(struct rq *rq)
558 558
559#ifdef CONFIG_NUMA_BALANCING 559#ifdef CONFIG_NUMA_BALANCING
560extern int migrate_task_to(struct task_struct *p, int cpu); 560extern int migrate_task_to(struct task_struct *p, int cpu);
561extern int migrate_swap(struct task_struct *, struct task_struct *);
561static inline void task_numa_free(struct task_struct *p) 562static inline void task_numa_free(struct task_struct *p)
562{ 563{
563 kfree(p->numa_faults); 564 kfree(p->numa_faults);
@@ -736,6 +737,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
736 */ 737 */
737 smp_wmb(); 738 smp_wmb();
738 task_thread_info(p)->cpu = cpu; 739 task_thread_info(p)->cpu = cpu;
740 p->wake_cpu = cpu;
739#endif 741#endif
740} 742}
741 743
@@ -991,7 +993,7 @@ struct sched_class {
991 void (*put_prev_task) (struct rq *rq, struct task_struct *p); 993 void (*put_prev_task) (struct rq *rq, struct task_struct *p);
992 994
993#ifdef CONFIG_SMP 995#ifdef CONFIG_SMP
994 int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); 996 int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
995 void (*migrate_task_rq)(struct task_struct *p, int next_cpu); 997 void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
996 998
997 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); 999 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index e08fbeeb54b9..47197de8abd9 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -11,7 +11,7 @@
11 11
12#ifdef CONFIG_SMP 12#ifdef CONFIG_SMP
13static int 13static int
14select_task_rq_stop(struct task_struct *p, int sd_flag, int flags) 14select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
15{ 15{
16 return task_cpu(p); /* stop tasks as never migrate */ 16 return task_cpu(p); /* stop tasks as never migrate */
17} 17}