diff options
| author | Steven Rostedt <srostedt@redhat.com> | 2010-05-14 09:29:52 -0400 |
|---|---|---|
| committer | Steven Rostedt <rostedt@goodmis.org> | 2010-05-14 09:29:52 -0400 |
| commit | 23e117fa44429cc054cb27d5621d64e4ced91e52 (patch) | |
| tree | a4b9d0902b9c6f009b2c297515221c1b9bed3af8 /kernel/sched_fair.c | |
| parent | 668eb65f092902eb7dd526af73d4a7f025a94612 (diff) | |
| parent | a93d2f1744206827ccf416e2cdc5018aa503314e (diff) | |
Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip into trace/tip/tracing/core-4
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 350 |
1 files changed, 148 insertions, 202 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5a5ea2cd924f..217e4a9393e4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -35,8 +35,8 @@ | |||
| 35 | * (to see the precise effective timeslice length of your workload, | 35 | * (to see the precise effective timeslice length of your workload, |
| 36 | * run vmstat and monitor the context-switches (cs) field) | 36 | * run vmstat and monitor the context-switches (cs) field) |
| 37 | */ | 37 | */ |
| 38 | unsigned int sysctl_sched_latency = 5000000ULL; | 38 | unsigned int sysctl_sched_latency = 6000000ULL; |
| 39 | unsigned int normalized_sysctl_sched_latency = 5000000ULL; | 39 | unsigned int normalized_sysctl_sched_latency = 6000000ULL; |
| 40 | 40 | ||
| 41 | /* | 41 | /* |
| 42 | * The initial- and re-scaling of tunables is configurable | 42 | * The initial- and re-scaling of tunables is configurable |
| @@ -52,15 +52,15 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling | |||
| 52 | 52 | ||
| 53 | /* | 53 | /* |
| 54 | * Minimal preemption granularity for CPU-bound tasks: | 54 | * Minimal preemption granularity for CPU-bound tasks: |
| 55 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 55 | * (default: 2 msec * (1 + ilog(ncpus)), units: nanoseconds) |
| 56 | */ | 56 | */ |
| 57 | unsigned int sysctl_sched_min_granularity = 1000000ULL; | 57 | unsigned int sysctl_sched_min_granularity = 2000000ULL; |
| 58 | unsigned int normalized_sysctl_sched_min_granularity = 1000000ULL; | 58 | unsigned int normalized_sysctl_sched_min_granularity = 2000000ULL; |
| 59 | 59 | ||
| 60 | /* | 60 | /* |
| 61 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity | 61 | * is kept at sysctl_sched_latency / sysctl_sched_min_granularity |
| 62 | */ | 62 | */ |
| 63 | static unsigned int sched_nr_latency = 5; | 63 | static unsigned int sched_nr_latency = 3; |
| 64 | 64 | ||
| 65 | /* | 65 | /* |
| 66 | * After fork, child runs first. If set to 0 (default) then | 66 | * After fork, child runs first. If set to 0 (default) then |
| @@ -505,7 +505,8 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
| 505 | { | 505 | { |
| 506 | unsigned long delta_exec_weighted; | 506 | unsigned long delta_exec_weighted; |
| 507 | 507 | ||
| 508 | schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); | 508 | schedstat_set(curr->statistics.exec_max, |
| 509 | max((u64)delta_exec, curr->statistics.exec_max)); | ||
| 509 | 510 | ||
| 510 | curr->sum_exec_runtime += delta_exec; | 511 | curr->sum_exec_runtime += delta_exec; |
| 511 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 512 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
| @@ -548,7 +549,7 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
| 548 | static inline void | 549 | static inline void |
| 549 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | 550 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 550 | { | 551 | { |
| 551 | schedstat_set(se->wait_start, rq_of(cfs_rq)->clock); | 552 | schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock); |
| 552 | } | 553 | } |
| 553 | 554 | ||
| 554 | /* | 555 | /* |
| @@ -567,18 +568,18 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 567 | static void | 568 | static void |
| 568 | update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) | 569 | update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 569 | { | 570 | { |
| 570 | schedstat_set(se->wait_max, max(se->wait_max, | 571 | schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max, |
| 571 | rq_of(cfs_rq)->clock - se->wait_start)); | 572 | rq_of(cfs_rq)->clock - se->statistics.wait_start)); |
| 572 | schedstat_set(se->wait_count, se->wait_count + 1); | 573 | schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1); |
| 573 | schedstat_set(se->wait_sum, se->wait_sum + | 574 | schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum + |
| 574 | rq_of(cfs_rq)->clock - se->wait_start); | 575 | rq_of(cfs_rq)->clock - se->statistics.wait_start); |
| 575 | #ifdef CONFIG_SCHEDSTATS | 576 | #ifdef CONFIG_SCHEDSTATS |
| 576 | if (entity_is_task(se)) { | 577 | if (entity_is_task(se)) { |
| 577 | trace_sched_stat_wait(task_of(se), | 578 | trace_sched_stat_wait(task_of(se), |
| 578 | rq_of(cfs_rq)->clock - se->wait_start); | 579 | rq_of(cfs_rq)->clock - se->statistics.wait_start); |
| 579 | } | 580 | } |
| 580 | #endif | 581 | #endif |
| 581 | schedstat_set(se->wait_start, 0); | 582 | schedstat_set(se->statistics.wait_start, 0); |
| 582 | } | 583 | } |
| 583 | 584 | ||
| 584 | static inline void | 585 | static inline void |
| @@ -657,39 +658,39 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 657 | if (entity_is_task(se)) | 658 | if (entity_is_task(se)) |
| 658 | tsk = task_of(se); | 659 | tsk = task_of(se); |
| 659 | 660 | ||
| 660 | if (se->sleep_start) { | 661 | if (se->statistics.sleep_start) { |
| 661 | u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; | 662 | u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start; |
| 662 | 663 | ||
| 663 | if ((s64)delta < 0) | 664 | if ((s64)delta < 0) |
| 664 | delta = 0; | 665 | delta = 0; |
| 665 | 666 | ||
| 666 | if (unlikely(delta > se->sleep_max)) | 667 | if (unlikely(delta > se->statistics.sleep_max)) |
| 667 | se->sleep_max = delta; | 668 | se->statistics.sleep_max = delta; |
| 668 | 669 | ||
| 669 | se->sleep_start = 0; | 670 | se->statistics.sleep_start = 0; |
| 670 | se->sum_sleep_runtime += delta; | 671 | se->statistics.sum_sleep_runtime += delta; |
| 671 | 672 | ||
| 672 | if (tsk) { | 673 | if (tsk) { |
| 673 | account_scheduler_latency(tsk, delta >> 10, 1); | 674 | account_scheduler_latency(tsk, delta >> 10, 1); |
| 674 | trace_sched_stat_sleep(tsk, delta); | 675 | trace_sched_stat_sleep(tsk, delta); |
| 675 | } | 676 | } |
| 676 | } | 677 | } |
| 677 | if (se->block_start) { | 678 | if (se->statistics.block_start) { |
| 678 | u64 delta = rq_of(cfs_rq)->clock - se->block_start; | 679 | u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start; |
| 679 | 680 | ||
| 680 | if ((s64)delta < 0) | 681 | if ((s64)delta < 0) |
| 681 | delta = 0; | 682 | delta = 0; |
| 682 | 683 | ||
| 683 | if (unlikely(delta > se->block_max)) | 684 | if (unlikely(delta > se->statistics.block_max)) |
| 684 | se->block_max = delta; | 685 | se->statistics.block_max = delta; |
| 685 | 686 | ||
| 686 | se->block_start = 0; | 687 | se->statistics.block_start = 0; |
| 687 | se->sum_sleep_runtime += delta; | 688 | se->statistics.sum_sleep_runtime += delta; |
| 688 | 689 | ||
| 689 | if (tsk) { | 690 | if (tsk) { |
| 690 | if (tsk->in_iowait) { | 691 | if (tsk->in_iowait) { |
| 691 | se->iowait_sum += delta; | 692 | se->statistics.iowait_sum += delta; |
| 692 | se->iowait_count++; | 693 | se->statistics.iowait_count++; |
| 693 | trace_sched_stat_iowait(tsk, delta); | 694 | trace_sched_stat_iowait(tsk, delta); |
| 694 | } | 695 | } |
| 695 | 696 | ||
| @@ -737,20 +738,10 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 737 | vruntime += sched_vslice(cfs_rq, se); | 738 | vruntime += sched_vslice(cfs_rq, se); |
| 738 | 739 | ||
| 739 | /* sleeps up to a single latency don't count. */ | 740 | /* sleeps up to a single latency don't count. */ |
| 740 | if (!initial && sched_feat(FAIR_SLEEPERS)) { | 741 | if (!initial) { |
| 741 | unsigned long thresh = sysctl_sched_latency; | 742 | unsigned long thresh = sysctl_sched_latency; |
| 742 | 743 | ||
| 743 | /* | 744 | /* |
| 744 | * Convert the sleeper threshold into virtual time. | ||
| 745 | * SCHED_IDLE is a special sub-class. We care about | ||
| 746 | * fairness only relative to other SCHED_IDLE tasks, | ||
| 747 | * all of which have the same weight. | ||
| 748 | */ | ||
| 749 | if (sched_feat(NORMALIZED_SLEEPER) && (!entity_is_task(se) || | ||
| 750 | task_of(se)->policy != SCHED_IDLE)) | ||
| 751 | thresh = calc_delta_fair(thresh, se); | ||
| 752 | |||
| 753 | /* | ||
| 754 | * Halve their sleep time's effect, to allow | 745 | * Halve their sleep time's effect, to allow |
| 755 | * for a gentler effect of sleepers: | 746 | * for a gentler effect of sleepers: |
| 756 | */ | 747 | */ |
| @@ -766,9 +757,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 766 | se->vruntime = vruntime; | 757 | se->vruntime = vruntime; |
| 767 | } | 758 | } |
| 768 | 759 | ||
| 769 | #define ENQUEUE_WAKEUP 1 | ||
| 770 | #define ENQUEUE_MIGRATE 2 | ||
| 771 | |||
| 772 | static void | 760 | static void |
| 773 | enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | 761 | enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
| 774 | { | 762 | { |
| @@ -776,7 +764,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
| 776 | * Update the normalized vruntime before updating min_vruntime | 764 | * Update the normalized vruntime before updating min_vruntime |
| 777 | * through callig update_curr(). | 765 | * through callig update_curr(). |
| 778 | */ | 766 | */ |
| 779 | if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE)) | 767 | if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING)) |
| 780 | se->vruntime += cfs_rq->min_vruntime; | 768 | se->vruntime += cfs_rq->min_vruntime; |
| 781 | 769 | ||
| 782 | /* | 770 | /* |
| @@ -812,7 +800,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 812 | } | 800 | } |
| 813 | 801 | ||
| 814 | static void | 802 | static void |
| 815 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | 803 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
| 816 | { | 804 | { |
| 817 | /* | 805 | /* |
| 818 | * Update run-time statistics of the 'current'. | 806 | * Update run-time statistics of the 'current'. |
| @@ -820,15 +808,15 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
| 820 | update_curr(cfs_rq); | 808 | update_curr(cfs_rq); |
| 821 | 809 | ||
| 822 | update_stats_dequeue(cfs_rq, se); | 810 | update_stats_dequeue(cfs_rq, se); |
| 823 | if (sleep) { | 811 | if (flags & DEQUEUE_SLEEP) { |
| 824 | #ifdef CONFIG_SCHEDSTATS | 812 | #ifdef CONFIG_SCHEDSTATS |
| 825 | if (entity_is_task(se)) { | 813 | if (entity_is_task(se)) { |
| 826 | struct task_struct *tsk = task_of(se); | 814 | struct task_struct *tsk = task_of(se); |
| 827 | 815 | ||
| 828 | if (tsk->state & TASK_INTERRUPTIBLE) | 816 | if (tsk->state & TASK_INTERRUPTIBLE) |
| 829 | se->sleep_start = rq_of(cfs_rq)->clock; | 817 | se->statistics.sleep_start = rq_of(cfs_rq)->clock; |
| 830 | if (tsk->state & TASK_UNINTERRUPTIBLE) | 818 | if (tsk->state & TASK_UNINTERRUPTIBLE) |
| 831 | se->block_start = rq_of(cfs_rq)->clock; | 819 | se->statistics.block_start = rq_of(cfs_rq)->clock; |
| 832 | } | 820 | } |
| 833 | #endif | 821 | #endif |
| 834 | } | 822 | } |
| @@ -845,7 +833,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
| 845 | * update can refer to the ->curr item and we need to reflect this | 833 | * update can refer to the ->curr item and we need to reflect this |
| 846 | * movement in our normalized position. | 834 | * movement in our normalized position. |
| 847 | */ | 835 | */ |
| 848 | if (!sleep) | 836 | if (!(flags & DEQUEUE_SLEEP)) |
| 849 | se->vruntime -= cfs_rq->min_vruntime; | 837 | se->vruntime -= cfs_rq->min_vruntime; |
| 850 | } | 838 | } |
| 851 | 839 | ||
| @@ -912,7 +900,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 912 | * when there are only lesser-weight tasks around): | 900 | * when there are only lesser-weight tasks around): |
| 913 | */ | 901 | */ |
| 914 | if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) { | 902 | if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) { |
| 915 | se->slice_max = max(se->slice_max, | 903 | se->statistics.slice_max = max(se->statistics.slice_max, |
| 916 | se->sum_exec_runtime - se->prev_sum_exec_runtime); | 904 | se->sum_exec_runtime - se->prev_sum_exec_runtime); |
| 917 | } | 905 | } |
| 918 | #endif | 906 | #endif |
| @@ -1054,16 +1042,10 @@ static inline void hrtick_update(struct rq *rq) | |||
| 1054 | * then put the task into the rbtree: | 1042 | * then put the task into the rbtree: |
| 1055 | */ | 1043 | */ |
| 1056 | static void | 1044 | static void |
| 1057 | enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head) | 1045 | enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) |
| 1058 | { | 1046 | { |
| 1059 | struct cfs_rq *cfs_rq; | 1047 | struct cfs_rq *cfs_rq; |
| 1060 | struct sched_entity *se = &p->se; | 1048 | struct sched_entity *se = &p->se; |
| 1061 | int flags = 0; | ||
| 1062 | |||
| 1063 | if (wakeup) | ||
| 1064 | flags |= ENQUEUE_WAKEUP; | ||
| 1065 | if (p->state == TASK_WAKING) | ||
| 1066 | flags |= ENQUEUE_MIGRATE; | ||
| 1067 | 1049 | ||
| 1068 | for_each_sched_entity(se) { | 1050 | for_each_sched_entity(se) { |
| 1069 | if (se->on_rq) | 1051 | if (se->on_rq) |
| @@ -1081,18 +1063,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head) | |||
| 1081 | * decreased. We remove the task from the rbtree and | 1063 | * decreased. We remove the task from the rbtree and |
| 1082 | * update the fair scheduling stats: | 1064 | * update the fair scheduling stats: |
| 1083 | */ | 1065 | */ |
| 1084 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | 1066 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) |
| 1085 | { | 1067 | { |
| 1086 | struct cfs_rq *cfs_rq; | 1068 | struct cfs_rq *cfs_rq; |
| 1087 | struct sched_entity *se = &p->se; | 1069 | struct sched_entity *se = &p->se; |
| 1088 | 1070 | ||
| 1089 | for_each_sched_entity(se) { | 1071 | for_each_sched_entity(se) { |
| 1090 | cfs_rq = cfs_rq_of(se); | 1072 | cfs_rq = cfs_rq_of(se); |
| 1091 | dequeue_entity(cfs_rq, se, sleep); | 1073 | dequeue_entity(cfs_rq, se, flags); |
| 1092 | /* Don't dequeue parent if it has other entities besides us */ | 1074 | /* Don't dequeue parent if it has other entities besides us */ |
| 1093 | if (cfs_rq->load.weight) | 1075 | if (cfs_rq->load.weight) |
| 1094 | break; | 1076 | break; |
| 1095 | sleep = 1; | 1077 | flags |= DEQUEUE_SLEEP; |
| 1096 | } | 1078 | } |
| 1097 | 1079 | ||
| 1098 | hrtick_update(rq); | 1080 | hrtick_update(rq); |
| @@ -1240,7 +1222,6 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
| 1240 | 1222 | ||
| 1241 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | 1223 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
| 1242 | { | 1224 | { |
| 1243 | struct task_struct *curr = current; | ||
| 1244 | unsigned long this_load, load; | 1225 | unsigned long this_load, load; |
| 1245 | int idx, this_cpu, prev_cpu; | 1226 | int idx, this_cpu, prev_cpu; |
| 1246 | unsigned long tl_per_task; | 1227 | unsigned long tl_per_task; |
| @@ -1255,18 +1236,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1255 | load = source_load(prev_cpu, idx); | 1236 | load = source_load(prev_cpu, idx); |
| 1256 | this_load = target_load(this_cpu, idx); | 1237 | this_load = target_load(this_cpu, idx); |
| 1257 | 1238 | ||
| 1258 | if (sync) { | ||
| 1259 | if (sched_feat(SYNC_LESS) && | ||
| 1260 | (curr->se.avg_overlap > sysctl_sched_migration_cost || | ||
| 1261 | p->se.avg_overlap > sysctl_sched_migration_cost)) | ||
| 1262 | sync = 0; | ||
| 1263 | } else { | ||
| 1264 | if (sched_feat(SYNC_MORE) && | ||
| 1265 | (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
| 1266 | p->se.avg_overlap < sysctl_sched_migration_cost)) | ||
| 1267 | sync = 1; | ||
| 1268 | } | ||
| 1269 | |||
| 1270 | /* | 1239 | /* |
| 1271 | * If sync wakeup then subtract the (maximum possible) | 1240 | * If sync wakeup then subtract the (maximum possible) |
| 1272 | * effect of the currently running task from the load | 1241 | * effect of the currently running task from the load |
| @@ -1306,7 +1275,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1306 | if (sync && balanced) | 1275 | if (sync && balanced) |
| 1307 | return 1; | 1276 | return 1; |
| 1308 | 1277 | ||
| 1309 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1278 | schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts); |
| 1310 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1279 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
| 1311 | 1280 | ||
| 1312 | if (balanced || | 1281 | if (balanced || |
| @@ -1318,7 +1287,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
| 1318 | * there is no bad imbalance. | 1287 | * there is no bad imbalance. |
| 1319 | */ | 1288 | */ |
| 1320 | schedstat_inc(sd, ttwu_move_affine); | 1289 | schedstat_inc(sd, ttwu_move_affine); |
| 1321 | schedstat_inc(p, se.nr_wakeups_affine); | 1290 | schedstat_inc(p, se.statistics.nr_wakeups_affine); |
| 1322 | 1291 | ||
| 1323 | return 1; | 1292 | return 1; |
| 1324 | } | 1293 | } |
| @@ -1406,29 +1375,48 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
| 1406 | /* | 1375 | /* |
| 1407 | * Try and locate an idle CPU in the sched_domain. | 1376 | * Try and locate an idle CPU in the sched_domain. |
| 1408 | */ | 1377 | */ |
| 1409 | static int | 1378 | static int select_idle_sibling(struct task_struct *p, int target) |
| 1410 | select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) | ||
| 1411 | { | 1379 | { |
| 1412 | int cpu = smp_processor_id(); | 1380 | int cpu = smp_processor_id(); |
| 1413 | int prev_cpu = task_cpu(p); | 1381 | int prev_cpu = task_cpu(p); |
| 1382 | struct sched_domain *sd; | ||
| 1414 | int i; | 1383 | int i; |
| 1415 | 1384 | ||
| 1416 | /* | 1385 | /* |
| 1417 | * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE | 1386 | * If the task is going to be woken-up on this cpu and if it is |
| 1418 | * test in select_task_rq_fair) and the prev_cpu is idle then that's | 1387 | * already idle, then it is the right target. |
| 1419 | * always a better target than the current cpu. | ||
| 1420 | */ | 1388 | */ |
| 1421 | if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) | 1389 | if (target == cpu && idle_cpu(cpu)) |
| 1390 | return cpu; | ||
| 1391 | |||
| 1392 | /* | ||
| 1393 | * If the task is going to be woken-up on the cpu where it previously | ||
| 1394 | * ran and if it is currently idle, then it the right target. | ||
| 1395 | */ | ||
| 1396 | if (target == prev_cpu && idle_cpu(prev_cpu)) | ||
| 1422 | return prev_cpu; | 1397 | return prev_cpu; |
| 1423 | 1398 | ||
| 1424 | /* | 1399 | /* |
| 1425 | * Otherwise, iterate the domain and find an elegible idle cpu. | 1400 | * Otherwise, iterate the domains and find an elegible idle cpu. |
| 1426 | */ | 1401 | */ |
| 1427 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | 1402 | for_each_domain(target, sd) { |
| 1428 | if (!cpu_rq(i)->cfs.nr_running) { | 1403 | if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) |
| 1429 | target = i; | ||
| 1430 | break; | 1404 | break; |
| 1405 | |||
| 1406 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | ||
| 1407 | if (idle_cpu(i)) { | ||
| 1408 | target = i; | ||
| 1409 | break; | ||
| 1410 | } | ||
| 1431 | } | 1411 | } |
| 1412 | |||
| 1413 | /* | ||
| 1414 | * Lets stop looking for an idle sibling when we reached | ||
| 1415 | * the domain that spans the current cpu and prev_cpu. | ||
| 1416 | */ | ||
| 1417 | if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && | ||
| 1418 | cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) | ||
| 1419 | break; | ||
| 1432 | } | 1420 | } |
| 1433 | 1421 | ||
| 1434 | return target; | 1422 | return target; |
| @@ -1445,7 +1433,8 @@ select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) | |||
| 1445 | * | 1433 | * |
| 1446 | * preempt must be disabled. | 1434 | * preempt must be disabled. |
| 1447 | */ | 1435 | */ |
| 1448 | static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | 1436 | static int |
| 1437 | select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) | ||
| 1449 | { | 1438 | { |
| 1450 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | 1439 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; |
| 1451 | int cpu = smp_processor_id(); | 1440 | int cpu = smp_processor_id(); |
| @@ -1456,8 +1445,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1456 | int sync = wake_flags & WF_SYNC; | 1445 | int sync = wake_flags & WF_SYNC; |
| 1457 | 1446 | ||
| 1458 | if (sd_flag & SD_BALANCE_WAKE) { | 1447 | if (sd_flag & SD_BALANCE_WAKE) { |
| 1459 | if (sched_feat(AFFINE_WAKEUPS) && | 1448 | if (cpumask_test_cpu(cpu, &p->cpus_allowed)) |
| 1460 | cpumask_test_cpu(cpu, &p->cpus_allowed)) | ||
| 1461 | want_affine = 1; | 1449 | want_affine = 1; |
| 1462 | new_cpu = prev_cpu; | 1450 | new_cpu = prev_cpu; |
| 1463 | } | 1451 | } |
| @@ -1491,34 +1479,13 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1491 | } | 1479 | } |
| 1492 | 1480 | ||
| 1493 | /* | 1481 | /* |
| 1494 | * While iterating the domains looking for a spanning | 1482 | * If both cpu and prev_cpu are part of this domain, |
| 1495 | * WAKE_AFFINE domain, adjust the affine target to any idle cpu | 1483 | * cpu is a valid SD_WAKE_AFFINE target. |
| 1496 | * in cache sharing domains along the way. | ||
| 1497 | */ | 1484 | */ |
| 1498 | if (want_affine) { | 1485 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && |
| 1499 | int target = -1; | 1486 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { |
| 1500 | 1487 | affine_sd = tmp; | |
| 1501 | /* | 1488 | want_affine = 0; |
| 1502 | * If both cpu and prev_cpu are part of this domain, | ||
| 1503 | * cpu is a valid SD_WAKE_AFFINE target. | ||
| 1504 | */ | ||
| 1505 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) | ||
| 1506 | target = cpu; | ||
| 1507 | |||
| 1508 | /* | ||
| 1509 | * If there's an idle sibling in this domain, make that | ||
| 1510 | * the wake_affine target instead of the current cpu. | ||
| 1511 | */ | ||
| 1512 | if (tmp->flags & SD_SHARE_PKG_RESOURCES) | ||
| 1513 | target = select_idle_sibling(p, tmp, target); | ||
| 1514 | |||
| 1515 | if (target >= 0) { | ||
| 1516 | if (tmp->flags & SD_WAKE_AFFINE) { | ||
| 1517 | affine_sd = tmp; | ||
| 1518 | want_affine = 0; | ||
| 1519 | } | ||
| 1520 | cpu = target; | ||
| 1521 | } | ||
| 1522 | } | 1489 | } |
| 1523 | 1490 | ||
| 1524 | if (!want_sd && !want_affine) | 1491 | if (!want_sd && !want_affine) |
| @@ -1531,22 +1498,29 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1531 | sd = tmp; | 1498 | sd = tmp; |
| 1532 | } | 1499 | } |
| 1533 | 1500 | ||
| 1501 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1534 | if (sched_feat(LB_SHARES_UPDATE)) { | 1502 | if (sched_feat(LB_SHARES_UPDATE)) { |
| 1535 | /* | 1503 | /* |
| 1536 | * Pick the largest domain to update shares over | 1504 | * Pick the largest domain to update shares over |
| 1537 | */ | 1505 | */ |
| 1538 | tmp = sd; | 1506 | tmp = sd; |
| 1539 | if (affine_sd && (!tmp || | 1507 | if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight)) |
| 1540 | cpumask_weight(sched_domain_span(affine_sd)) > | ||
| 1541 | cpumask_weight(sched_domain_span(sd)))) | ||
| 1542 | tmp = affine_sd; | 1508 | tmp = affine_sd; |
| 1543 | 1509 | ||
| 1544 | if (tmp) | 1510 | if (tmp) { |
| 1511 | raw_spin_unlock(&rq->lock); | ||
| 1545 | update_shares(tmp); | 1512 | update_shares(tmp); |
| 1513 | raw_spin_lock(&rq->lock); | ||
| 1514 | } | ||
| 1546 | } | 1515 | } |
| 1516 | #endif | ||
| 1547 | 1517 | ||
| 1548 | if (affine_sd && wake_affine(affine_sd, p, sync)) | 1518 | if (affine_sd) { |
| 1549 | return cpu; | 1519 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) |
| 1520 | return select_idle_sibling(p, cpu); | ||
| 1521 | else | ||
| 1522 | return select_idle_sibling(p, prev_cpu); | ||
| 1523 | } | ||
| 1550 | 1524 | ||
| 1551 | while (sd) { | 1525 | while (sd) { |
| 1552 | int load_idx = sd->forkexec_idx; | 1526 | int load_idx = sd->forkexec_idx; |
| @@ -1576,10 +1550,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1576 | 1550 | ||
| 1577 | /* Now try balancing at a lower domain level of new_cpu */ | 1551 | /* Now try balancing at a lower domain level of new_cpu */ |
| 1578 | cpu = new_cpu; | 1552 | cpu = new_cpu; |
| 1579 | weight = cpumask_weight(sched_domain_span(sd)); | 1553 | weight = sd->span_weight; |
| 1580 | sd = NULL; | 1554 | sd = NULL; |
| 1581 | for_each_domain(cpu, tmp) { | 1555 | for_each_domain(cpu, tmp) { |
| 1582 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | 1556 | if (weight <= tmp->span_weight) |
| 1583 | break; | 1557 | break; |
| 1584 | if (tmp->flags & sd_flag) | 1558 | if (tmp->flags & sd_flag) |
| 1585 | sd = tmp; | 1559 | sd = tmp; |
| @@ -1591,63 +1565,26 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
| 1591 | } | 1565 | } |
| 1592 | #endif /* CONFIG_SMP */ | 1566 | #endif /* CONFIG_SMP */ |
| 1593 | 1567 | ||
| 1594 | /* | ||
| 1595 | * Adaptive granularity | ||
| 1596 | * | ||
| 1597 | * se->avg_wakeup gives the average time a task runs until it does a wakeup, | ||
| 1598 | * with the limit of wakeup_gran -- when it never does a wakeup. | ||
| 1599 | * | ||
| 1600 | * So the smaller avg_wakeup is the faster we want this task to preempt, | ||
| 1601 | * but we don't want to treat the preemptee unfairly and therefore allow it | ||
| 1602 | * to run for at least the amount of time we'd like to run. | ||
| 1603 | * | ||
| 1604 | * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one | ||
| 1605 | * | ||
| 1606 | * NOTE: we use *nr_running to scale with load, this nicely matches the | ||
| 1607 | * degrading latency on load. | ||
| 1608 | */ | ||
| 1609 | static unsigned long | ||
| 1610 | adaptive_gran(struct sched_entity *curr, struct sched_entity *se) | ||
| 1611 | { | ||
| 1612 | u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | ||
| 1613 | u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running; | ||
| 1614 | u64 gran = 0; | ||
| 1615 | |||
| 1616 | if (this_run < expected_wakeup) | ||
| 1617 | gran = expected_wakeup - this_run; | ||
| 1618 | |||
| 1619 | return min_t(s64, gran, sysctl_sched_wakeup_granularity); | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | static unsigned long | 1568 | static unsigned long |
| 1623 | wakeup_gran(struct sched_entity *curr, struct sched_entity *se) | 1569 | wakeup_gran(struct sched_entity *curr, struct sched_entity *se) |
| 1624 | { | 1570 | { |
| 1625 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1571 | unsigned long gran = sysctl_sched_wakeup_granularity; |
| 1626 | 1572 | ||
| 1627 | if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN)) | ||
| 1628 | gran = adaptive_gran(curr, se); | ||
| 1629 | |||
| 1630 | /* | 1573 | /* |
| 1631 | * Since its curr running now, convert the gran from real-time | 1574 | * Since its curr running now, convert the gran from real-time |
| 1632 | * to virtual-time in his units. | 1575 | * to virtual-time in his units. |
| 1576 | * | ||
| 1577 | * By using 'se' instead of 'curr' we penalize light tasks, so | ||
| 1578 | * they get preempted easier. That is, if 'se' < 'curr' then | ||
| 1579 | * the resulting gran will be larger, therefore penalizing the | ||
| 1580 | * lighter, if otoh 'se' > 'curr' then the resulting gran will | ||
| 1581 | * be smaller, again penalizing the lighter task. | ||
| 1582 | * | ||
| 1583 | * This is especially important for buddies when the leftmost | ||
| 1584 | * task is higher priority than the buddy. | ||
| 1633 | */ | 1585 | */ |
| 1634 | if (sched_feat(ASYM_GRAN)) { | 1586 | if (unlikely(se->load.weight != NICE_0_LOAD)) |
| 1635 | /* | 1587 | gran = calc_delta_fair(gran, se); |
| 1636 | * By using 'se' instead of 'curr' we penalize light tasks, so | ||
| 1637 | * they get preempted easier. That is, if 'se' < 'curr' then | ||
| 1638 | * the resulting gran will be larger, therefore penalizing the | ||
| 1639 | * lighter, if otoh 'se' > 'curr' then the resulting gran will | ||
| 1640 | * be smaller, again penalizing the lighter task. | ||
| 1641 | * | ||
| 1642 | * This is especially important for buddies when the leftmost | ||
| 1643 | * task is higher priority than the buddy. | ||
| 1644 | */ | ||
| 1645 | if (unlikely(se->load.weight != NICE_0_LOAD)) | ||
| 1646 | gran = calc_delta_fair(gran, se); | ||
| 1647 | } else { | ||
| 1648 | if (unlikely(curr->load.weight != NICE_0_LOAD)) | ||
| 1649 | gran = calc_delta_fair(gran, curr); | ||
| 1650 | } | ||
| 1651 | 1588 | ||
| 1652 | return gran; | 1589 | return gran; |
| 1653 | } | 1590 | } |
| @@ -1705,7 +1642,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1705 | struct task_struct *curr = rq->curr; | 1642 | struct task_struct *curr = rq->curr; |
| 1706 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1643 | struct sched_entity *se = &curr->se, *pse = &p->se; |
| 1707 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1644 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
| 1708 | int sync = wake_flags & WF_SYNC; | ||
| 1709 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1645 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
| 1710 | 1646 | ||
| 1711 | if (unlikely(rt_prio(p->prio))) | 1647 | if (unlikely(rt_prio(p->prio))) |
| @@ -1738,14 +1674,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1738 | if (unlikely(curr->policy == SCHED_IDLE)) | 1674 | if (unlikely(curr->policy == SCHED_IDLE)) |
| 1739 | goto preempt; | 1675 | goto preempt; |
| 1740 | 1676 | ||
| 1741 | if (sched_feat(WAKEUP_SYNC) && sync) | ||
| 1742 | goto preempt; | ||
| 1743 | |||
| 1744 | if (sched_feat(WAKEUP_OVERLAP) && | ||
| 1745 | se->avg_overlap < sysctl_sched_migration_cost && | ||
| 1746 | pse->avg_overlap < sysctl_sched_migration_cost) | ||
| 1747 | goto preempt; | ||
| 1748 | |||
| 1749 | if (!sched_feat(WAKEUP_PREEMPT)) | 1677 | if (!sched_feat(WAKEUP_PREEMPT)) |
| 1750 | return; | 1678 | return; |
| 1751 | 1679 | ||
| @@ -1844,13 +1772,13 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 1844 | * 3) are cache-hot on their current CPU. | 1772 | * 3) are cache-hot on their current CPU. |
| 1845 | */ | 1773 | */ |
| 1846 | if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { | 1774 | if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { |
| 1847 | schedstat_inc(p, se.nr_failed_migrations_affine); | 1775 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); |
| 1848 | return 0; | 1776 | return 0; |
| 1849 | } | 1777 | } |
| 1850 | *all_pinned = 0; | 1778 | *all_pinned = 0; |
| 1851 | 1779 | ||
| 1852 | if (task_running(rq, p)) { | 1780 | if (task_running(rq, p)) { |
| 1853 | schedstat_inc(p, se.nr_failed_migrations_running); | 1781 | schedstat_inc(p, se.statistics.nr_failed_migrations_running); |
| 1854 | return 0; | 1782 | return 0; |
| 1855 | } | 1783 | } |
| 1856 | 1784 | ||
| @@ -1866,14 +1794,14 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 1866 | #ifdef CONFIG_SCHEDSTATS | 1794 | #ifdef CONFIG_SCHEDSTATS |
| 1867 | if (tsk_cache_hot) { | 1795 | if (tsk_cache_hot) { |
| 1868 | schedstat_inc(sd, lb_hot_gained[idle]); | 1796 | schedstat_inc(sd, lb_hot_gained[idle]); |
| 1869 | schedstat_inc(p, se.nr_forced_migrations); | 1797 | schedstat_inc(p, se.statistics.nr_forced_migrations); |
| 1870 | } | 1798 | } |
| 1871 | #endif | 1799 | #endif |
| 1872 | return 1; | 1800 | return 1; |
| 1873 | } | 1801 | } |
| 1874 | 1802 | ||
| 1875 | if (tsk_cache_hot) { | 1803 | if (tsk_cache_hot) { |
| 1876 | schedstat_inc(p, se.nr_failed_migrations_hot); | 1804 | schedstat_inc(p, se.statistics.nr_failed_migrations_hot); |
| 1877 | return 0; | 1805 | return 0; |
| 1878 | } | 1806 | } |
| 1879 | return 1; | 1807 | return 1; |
| @@ -2311,7 +2239,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) | |||
| 2311 | 2239 | ||
| 2312 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) | 2240 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) |
| 2313 | { | 2241 | { |
| 2314 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 2242 | unsigned long weight = sd->span_weight; |
| 2315 | unsigned long smt_gain = sd->smt_gain; | 2243 | unsigned long smt_gain = sd->smt_gain; |
| 2316 | 2244 | ||
| 2317 | smt_gain /= weight; | 2245 | smt_gain /= weight; |
| @@ -2344,7 +2272,7 @@ unsigned long scale_rt_power(int cpu) | |||
| 2344 | 2272 | ||
| 2345 | static void update_cpu_power(struct sched_domain *sd, int cpu) | 2273 | static void update_cpu_power(struct sched_domain *sd, int cpu) |
| 2346 | { | 2274 | { |
| 2347 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 2275 | unsigned long weight = sd->span_weight; |
| 2348 | unsigned long power = SCHED_LOAD_SCALE; | 2276 | unsigned long power = SCHED_LOAD_SCALE; |
| 2349 | struct sched_group *sdg = sd->groups; | 2277 | struct sched_group *sdg = sd->groups; |
| 2350 | 2278 | ||
| @@ -2870,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle) | |||
| 2870 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); | 2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); |
| 2871 | } | 2799 | } |
| 2872 | 2800 | ||
| 2801 | static int active_load_balance_cpu_stop(void *data); | ||
| 2802 | |||
| 2873 | /* | 2803 | /* |
| 2874 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2804 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
| 2875 | * tasks if there is an imbalance. | 2805 | * tasks if there is an imbalance. |
| @@ -2959,8 +2889,9 @@ redo: | |||
| 2959 | if (need_active_balance(sd, sd_idle, idle)) { | 2889 | if (need_active_balance(sd, sd_idle, idle)) { |
| 2960 | raw_spin_lock_irqsave(&busiest->lock, flags); | 2890 | raw_spin_lock_irqsave(&busiest->lock, flags); |
| 2961 | 2891 | ||
| 2962 | /* don't kick the migration_thread, if the curr | 2892 | /* don't kick the active_load_balance_cpu_stop, |
| 2963 | * task on busiest cpu can't be moved to this_cpu | 2893 | * if the curr task on busiest cpu can't be |
| 2894 | * moved to this_cpu | ||
| 2964 | */ | 2895 | */ |
| 2965 | if (!cpumask_test_cpu(this_cpu, | 2896 | if (!cpumask_test_cpu(this_cpu, |
| 2966 | &busiest->curr->cpus_allowed)) { | 2897 | &busiest->curr->cpus_allowed)) { |
| @@ -2970,14 +2901,22 @@ redo: | |||
| 2970 | goto out_one_pinned; | 2901 | goto out_one_pinned; |
| 2971 | } | 2902 | } |
| 2972 | 2903 | ||
| 2904 | /* | ||
| 2905 | * ->active_balance synchronizes accesses to | ||
| 2906 | * ->active_balance_work. Once set, it's cleared | ||
| 2907 | * only after active load balance is finished. | ||
| 2908 | */ | ||
| 2973 | if (!busiest->active_balance) { | 2909 | if (!busiest->active_balance) { |
| 2974 | busiest->active_balance = 1; | 2910 | busiest->active_balance = 1; |
| 2975 | busiest->push_cpu = this_cpu; | 2911 | busiest->push_cpu = this_cpu; |
| 2976 | active_balance = 1; | 2912 | active_balance = 1; |
| 2977 | } | 2913 | } |
| 2978 | raw_spin_unlock_irqrestore(&busiest->lock, flags); | 2914 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
| 2915 | |||
| 2979 | if (active_balance) | 2916 | if (active_balance) |
| 2980 | wake_up_process(busiest->migration_thread); | 2917 | stop_one_cpu_nowait(cpu_of(busiest), |
| 2918 | active_load_balance_cpu_stop, busiest, | ||
| 2919 | &busiest->active_balance_work); | ||
| 2981 | 2920 | ||
| 2982 | /* | 2921 | /* |
| 2983 | * We've kicked active balancing, reset the failure | 2922 | * We've kicked active balancing, reset the failure |
| @@ -3084,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 3084 | } | 3023 | } |
| 3085 | 3024 | ||
| 3086 | /* | 3025 | /* |
| 3087 | * active_load_balance is run by migration threads. It pushes running tasks | 3026 | * active_load_balance_cpu_stop is run by cpu stopper. It pushes |
| 3088 | * off the busiest CPU onto idle CPUs. It requires at least 1 task to be | 3027 | * running tasks off the busiest CPU onto idle CPUs. It requires at |
| 3089 | * running on each physical CPU where possible, and avoids physical / | 3028 | * least 1 task to be running on each physical CPU where possible, and |
| 3090 | * logical imbalances. | 3029 | * avoids physical / logical imbalances. |
| 3091 | * | ||
| 3092 | * Called with busiest_rq locked. | ||
| 3093 | */ | 3030 | */ |
| 3094 | static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | 3031 | static int active_load_balance_cpu_stop(void *data) |
| 3095 | { | 3032 | { |
| 3033 | struct rq *busiest_rq = data; | ||
| 3034 | int busiest_cpu = cpu_of(busiest_rq); | ||
| 3096 | int target_cpu = busiest_rq->push_cpu; | 3035 | int target_cpu = busiest_rq->push_cpu; |
| 3036 | struct rq *target_rq = cpu_rq(target_cpu); | ||
| 3097 | struct sched_domain *sd; | 3037 | struct sched_domain *sd; |
| 3098 | struct rq *target_rq; | 3038 | |
| 3039 | raw_spin_lock_irq(&busiest_rq->lock); | ||
| 3040 | |||
| 3041 | /* make sure the requested cpu hasn't gone down in the meantime */ | ||
| 3042 | if (unlikely(busiest_cpu != smp_processor_id() || | ||
| 3043 | !busiest_rq->active_balance)) | ||
| 3044 | goto out_unlock; | ||
| 3099 | 3045 | ||
| 3100 | /* Is there any task to move? */ | 3046 | /* Is there any task to move? */ |
| 3101 | if (busiest_rq->nr_running <= 1) | 3047 | if (busiest_rq->nr_running <= 1) |
| 3102 | return; | 3048 | goto out_unlock; |
| 3103 | |||
| 3104 | target_rq = cpu_rq(target_cpu); | ||
| 3105 | 3049 | ||
| 3106 | /* | 3050 | /* |
| 3107 | * This condition is "impossible", if it occurs | 3051 | * This condition is "impossible", if it occurs |
| @@ -3112,8 +3056,6 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
| 3112 | 3056 | ||
| 3113 | /* move a task from busiest_rq to target_rq */ | 3057 | /* move a task from busiest_rq to target_rq */ |
| 3114 | double_lock_balance(busiest_rq, target_rq); | 3058 | double_lock_balance(busiest_rq, target_rq); |
| 3115 | update_rq_clock(busiest_rq); | ||
| 3116 | update_rq_clock(target_rq); | ||
| 3117 | 3059 | ||
| 3118 | /* Search for an sd spanning us and the target CPU. */ | 3060 | /* Search for an sd spanning us and the target CPU. */ |
| 3119 | for_each_domain(target_cpu, sd) { | 3061 | for_each_domain(target_cpu, sd) { |
| @@ -3132,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
| 3132 | schedstat_inc(sd, alb_failed); | 3074 | schedstat_inc(sd, alb_failed); |
| 3133 | } | 3075 | } |
| 3134 | double_unlock_balance(busiest_rq, target_rq); | 3076 | double_unlock_balance(busiest_rq, target_rq); |
| 3077 | out_unlock: | ||
| 3078 | busiest_rq->active_balance = 0; | ||
| 3079 | raw_spin_unlock_irq(&busiest_rq->lock); | ||
| 3080 | return 0; | ||
| 3135 | } | 3081 | } |
| 3136 | 3082 | ||
| 3137 | #ifdef CONFIG_NO_HZ | 3083 | #ifdef CONFIG_NO_HZ |
