diff options
| -rw-r--r-- | kernel/sched/core.c | 68 |
1 files changed, 61 insertions, 7 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 96a4267e6020..9fd37169b302 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2089,11 +2089,24 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 2089 | p->sched_contributes_to_load = !!task_contributes_to_load(p); | 2089 | p->sched_contributes_to_load = !!task_contributes_to_load(p); |
| 2090 | p->state = TASK_WAKING; | 2090 | p->state = TASK_WAKING; |
| 2091 | 2091 | ||
| 2092 | if (p->in_iowait) { | ||
| 2093 | delayacct_blkio_end(); | ||
| 2094 | atomic_dec(&task_rq(p)->nr_iowait); | ||
| 2095 | } | ||
| 2096 | |||
| 2092 | cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); | 2097 | cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); |
| 2093 | if (task_cpu(p) != cpu) { | 2098 | if (task_cpu(p) != cpu) { |
| 2094 | wake_flags |= WF_MIGRATED; | 2099 | wake_flags |= WF_MIGRATED; |
| 2095 | set_task_cpu(p, cpu); | 2100 | set_task_cpu(p, cpu); |
| 2096 | } | 2101 | } |
| 2102 | |||
| 2103 | #else /* CONFIG_SMP */ | ||
| 2104 | |||
| 2105 | if (p->in_iowait) { | ||
| 2106 | delayacct_blkio_end(); | ||
| 2107 | atomic_dec(&task_rq(p)->nr_iowait); | ||
| 2108 | } | ||
| 2109 | |||
| 2097 | #endif /* CONFIG_SMP */ | 2110 | #endif /* CONFIG_SMP */ |
| 2098 | 2111 | ||
| 2099 | ttwu_queue(p, cpu, wake_flags); | 2112 | ttwu_queue(p, cpu, wake_flags); |
| @@ -2143,8 +2156,13 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) | |||
| 2143 | 2156 | ||
| 2144 | trace_sched_waking(p); | 2157 | trace_sched_waking(p); |
| 2145 | 2158 | ||
| 2146 | if (!task_on_rq_queued(p)) | 2159 | if (!task_on_rq_queued(p)) { |
| 2160 | if (p->in_iowait) { | ||
| 2161 | delayacct_blkio_end(); | ||
| 2162 | atomic_dec(&rq->nr_iowait); | ||
| 2163 | } | ||
| 2147 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); | 2164 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); |
| 2165 | } | ||
| 2148 | 2166 | ||
| 2149 | ttwu_do_wakeup(rq, p, 0, rf); | 2167 | ttwu_do_wakeup(rq, p, 0, rf); |
| 2150 | ttwu_stat(p, smp_processor_id(), 0); | 2168 | ttwu_stat(p, smp_processor_id(), 0); |
| @@ -2956,6 +2974,36 @@ unsigned long long nr_context_switches(void) | |||
| 2956 | return sum; | 2974 | return sum; |
| 2957 | } | 2975 | } |
| 2958 | 2976 | ||
| 2977 | /* | ||
| 2978 | * IO-wait accounting, and how its mostly bollocks (on SMP). | ||
| 2979 | * | ||
| 2980 | * The idea behind IO-wait account is to account the idle time that we could | ||
| 2981 | * have spend running if it were not for IO. That is, if we were to improve the | ||
| 2982 | * storage performance, we'd have a proportional reduction in IO-wait time. | ||
| 2983 | * | ||
| 2984 | * This all works nicely on UP, where, when a task blocks on IO, we account | ||
| 2985 | * idle time as IO-wait, because if the storage were faster, it could've been | ||
| 2986 | * running and we'd not be idle. | ||
| 2987 | * | ||
| 2988 | * This has been extended to SMP, by doing the same for each CPU. This however | ||
| 2989 | * is broken. | ||
| 2990 | * | ||
| 2991 | * Imagine for instance the case where two tasks block on one CPU, only the one | ||
| 2992 | * CPU will have IO-wait accounted, while the other has regular idle. Even | ||
| 2993 | * though, if the storage were faster, both could've ran at the same time, | ||
| 2994 | * utilising both CPUs. | ||
| 2995 | * | ||
| 2996 | * This means, that when looking globally, the current IO-wait accounting on | ||
| 2997 | * SMP is a lower bound, by reason of under accounting. | ||
| 2998 | * | ||
| 2999 | * Worse, since the numbers are provided per CPU, they are sometimes | ||
| 3000 | * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly | ||
| 3001 | * associated with any one particular CPU, it can wake to another CPU than it | ||
| 3002 | * blocked on. This means the per CPU IO-wait number is meaningless. | ||
| 3003 | * | ||
| 3004 | * Task CPU affinities can make all that even more 'interesting'. | ||
| 3005 | */ | ||
| 3006 | |||
| 2959 | unsigned long nr_iowait(void) | 3007 | unsigned long nr_iowait(void) |
| 2960 | { | 3008 | { |
| 2961 | unsigned long i, sum = 0; | 3009 | unsigned long i, sum = 0; |
| @@ -2966,6 +3014,13 @@ unsigned long nr_iowait(void) | |||
| 2966 | return sum; | 3014 | return sum; |
| 2967 | } | 3015 | } |
| 2968 | 3016 | ||
| 3017 | /* | ||
| 3018 | * Consumers of these two interfaces, like for example the cpufreq menu | ||
| 3019 | * governor are using nonsensical data. Boosting frequency for a CPU that has | ||
| 3020 | * IO-wait which might not even end up running the task when it does become | ||
| 3021 | * runnable. | ||
| 3022 | */ | ||
| 3023 | |||
| 2969 | unsigned long nr_iowait_cpu(int cpu) | 3024 | unsigned long nr_iowait_cpu(int cpu) |
| 2970 | { | 3025 | { |
| 2971 | struct rq *this = cpu_rq(cpu); | 3026 | struct rq *this = cpu_rq(cpu); |
| @@ -3377,6 +3432,11 @@ static void __sched notrace __schedule(bool preempt) | |||
| 3377 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | 3432 | deactivate_task(rq, prev, DEQUEUE_SLEEP); |
| 3378 | prev->on_rq = 0; | 3433 | prev->on_rq = 0; |
| 3379 | 3434 | ||
| 3435 | if (prev->in_iowait) { | ||
| 3436 | atomic_inc(&rq->nr_iowait); | ||
| 3437 | delayacct_blkio_start(); | ||
| 3438 | } | ||
| 3439 | |||
| 3380 | /* | 3440 | /* |
| 3381 | * If a worker went to sleep, notify and ask workqueue | 3441 | * If a worker went to sleep, notify and ask workqueue |
| 3382 | * whether it wants to wake up a task to maintain | 3442 | * whether it wants to wake up a task to maintain |
| @@ -5075,19 +5135,13 @@ EXPORT_SYMBOL_GPL(yield_to); | |||
| 5075 | long __sched io_schedule_timeout(long timeout) | 5135 | long __sched io_schedule_timeout(long timeout) |
| 5076 | { | 5136 | { |
| 5077 | int old_iowait = current->in_iowait; | 5137 | int old_iowait = current->in_iowait; |
| 5078 | struct rq *rq; | ||
| 5079 | long ret; | 5138 | long ret; |
| 5080 | 5139 | ||
| 5081 | current->in_iowait = 1; | 5140 | current->in_iowait = 1; |
| 5082 | blk_schedule_flush_plug(current); | 5141 | blk_schedule_flush_plug(current); |
| 5083 | 5142 | ||
| 5084 | delayacct_blkio_start(); | ||
| 5085 | rq = raw_rq(); | ||
| 5086 | atomic_inc(&rq->nr_iowait); | ||
| 5087 | ret = schedule_timeout(timeout); | 5143 | ret = schedule_timeout(timeout); |
| 5088 | current->in_iowait = old_iowait; | 5144 | current->in_iowait = old_iowait; |
| 5089 | atomic_dec(&rq->nr_iowait); | ||
| 5090 | delayacct_blkio_end(); | ||
| 5091 | 5145 | ||
| 5092 | return ret; | 5146 | return ret; |
| 5093 | } | 5147 | } |
