diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/relay.c | 5 | ||||
-rw-r--r-- | kernel/sched.c | 11 | ||||
-rw-r--r-- | kernel/sched_debug.c | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 191 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 14 |
5 files changed, 136 insertions, 86 deletions
diff --git a/kernel/relay.c b/kernel/relay.c index d080b9d161a7..4c035a8a248c 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -1066,7 +1066,7 @@ static int subbuf_splice_actor(struct file *in, | |||
1066 | unsigned int flags, | 1066 | unsigned int flags, |
1067 | int *nonpad_ret) | 1067 | int *nonpad_ret) |
1068 | { | 1068 | { |
1069 | unsigned int pidx, poff, total_len, subbuf_pages, ret; | 1069 | unsigned int pidx, poff, total_len, subbuf_pages, nr_pages, ret; |
1070 | struct rchan_buf *rbuf = in->private_data; | 1070 | struct rchan_buf *rbuf = in->private_data; |
1071 | unsigned int subbuf_size = rbuf->chan->subbuf_size; | 1071 | unsigned int subbuf_size = rbuf->chan->subbuf_size; |
1072 | uint64_t pos = (uint64_t) *ppos; | 1072 | uint64_t pos = (uint64_t) *ppos; |
@@ -1097,8 +1097,9 @@ static int subbuf_splice_actor(struct file *in, | |||
1097 | subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; | 1097 | subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; |
1098 | pidx = (read_start / PAGE_SIZE) % subbuf_pages; | 1098 | pidx = (read_start / PAGE_SIZE) % subbuf_pages; |
1099 | poff = read_start & ~PAGE_MASK; | 1099 | poff = read_start & ~PAGE_MASK; |
1100 | nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS); | ||
1100 | 1101 | ||
1101 | for (total_len = 0; spd.nr_pages < subbuf_pages; spd.nr_pages++) { | 1102 | for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { |
1102 | unsigned int this_len, this_end, private; | 1103 | unsigned int this_len, this_end, private; |
1103 | unsigned int cur_pos = read_start + total_len; | 1104 | unsigned int cur_pos = read_start + total_len; |
1104 | 1105 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index d1ad69b270ca..3f7c5eb254e2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1396,6 +1396,12 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
1396 | { | 1396 | { |
1397 | s64 delta; | 1397 | s64 delta; |
1398 | 1398 | ||
1399 | /* | ||
1400 | * Buddy candidates are cache hot: | ||
1401 | */ | ||
1402 | if (&p->se == cfs_rq_of(&p->se)->next) | ||
1403 | return 1; | ||
1404 | |||
1399 | if (p->sched_class != &fair_sched_class) | 1405 | if (p->sched_class != &fair_sched_class) |
1400 | return 0; | 1406 | return 0; |
1401 | 1407 | ||
@@ -1855,10 +1861,11 @@ out_activate: | |||
1855 | schedstat_inc(p, se.nr_wakeups_remote); | 1861 | schedstat_inc(p, se.nr_wakeups_remote); |
1856 | update_rq_clock(rq); | 1862 | update_rq_clock(rq); |
1857 | activate_task(rq, p, 1); | 1863 | activate_task(rq, p, 1); |
1858 | check_preempt_curr(rq, p); | ||
1859 | success = 1; | 1864 | success = 1; |
1860 | 1865 | ||
1861 | out_running: | 1866 | out_running: |
1867 | check_preempt_curr(rq, p); | ||
1868 | |||
1862 | p->state = TASK_RUNNING; | 1869 | p->state = TASK_RUNNING; |
1863 | #ifdef CONFIG_SMP | 1870 | #ifdef CONFIG_SMP |
1864 | if (p->sched_class->task_wake_up) | 1871 | if (p->sched_class->task_wake_up) |
@@ -1892,6 +1899,8 @@ static void __sched_fork(struct task_struct *p) | |||
1892 | p->se.exec_start = 0; | 1899 | p->se.exec_start = 0; |
1893 | p->se.sum_exec_runtime = 0; | 1900 | p->se.sum_exec_runtime = 0; |
1894 | p->se.prev_sum_exec_runtime = 0; | 1901 | p->se.prev_sum_exec_runtime = 0; |
1902 | p->se.last_wakeup = 0; | ||
1903 | p->se.avg_overlap = 0; | ||
1895 | 1904 | ||
1896 | #ifdef CONFIG_SCHEDSTATS | 1905 | #ifdef CONFIG_SCHEDSTATS |
1897 | p->se.wait_start = 0; | 1906 | p->se.wait_start = 0; |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 4b5e24cf2f4a..ef358ba07683 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -288,6 +288,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
288 | PN(se.exec_start); | 288 | PN(se.exec_start); |
289 | PN(se.vruntime); | 289 | PN(se.vruntime); |
290 | PN(se.sum_exec_runtime); | 290 | PN(se.sum_exec_runtime); |
291 | PN(se.avg_overlap); | ||
291 | 292 | ||
292 | nr_switches = p->nvcsw + p->nivcsw; | 293 | nr_switches = p->nvcsw + p->nivcsw; |
293 | 294 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f2cc59080efa..b85cac4b5e25 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -73,13 +73,13 @@ unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; | |||
73 | 73 | ||
74 | /* | 74 | /* |
75 | * SCHED_OTHER wake-up granularity. | 75 | * SCHED_OTHER wake-up granularity. |
76 | * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds) | 76 | * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds) |
77 | * | 77 | * |
78 | * This option delays the preemption effects of decoupled workloads | 78 | * This option delays the preemption effects of decoupled workloads |
79 | * and reduces their over-scheduling. Synchronous workloads will still | 79 | * and reduces their over-scheduling. Synchronous workloads will still |
80 | * have immediate wakeup/sleep latencies. | 80 | * have immediate wakeup/sleep latencies. |
81 | */ | 81 | */ |
82 | unsigned int sysctl_sched_wakeup_granularity = 10000000UL; | 82 | unsigned int sysctl_sched_wakeup_granularity = 5000000UL; |
83 | 83 | ||
84 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 84 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
85 | 85 | ||
@@ -556,6 +556,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
556 | account_entity_enqueue(cfs_rq, se); | 556 | account_entity_enqueue(cfs_rq, se); |
557 | } | 557 | } |
558 | 558 | ||
559 | static void update_avg(u64 *avg, u64 sample) | ||
560 | { | ||
561 | s64 diff = sample - *avg; | ||
562 | *avg += diff >> 3; | ||
563 | } | ||
564 | |||
565 | static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
566 | { | ||
567 | if (!se->last_wakeup) | ||
568 | return; | ||
569 | |||
570 | update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup); | ||
571 | se->last_wakeup = 0; | ||
572 | } | ||
573 | |||
559 | static void | 574 | static void |
560 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | 575 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) |
561 | { | 576 | { |
@@ -566,6 +581,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
566 | 581 | ||
567 | update_stats_dequeue(cfs_rq, se); | 582 | update_stats_dequeue(cfs_rq, se); |
568 | if (sleep) { | 583 | if (sleep) { |
584 | update_avg_stats(cfs_rq, se); | ||
569 | #ifdef CONFIG_SCHEDSTATS | 585 | #ifdef CONFIG_SCHEDSTATS |
570 | if (entity_is_task(se)) { | 586 | if (entity_is_task(se)) { |
571 | struct task_struct *tsk = task_of(se); | 587 | struct task_struct *tsk = task_of(se); |
@@ -980,96 +996,121 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
980 | #endif | 996 | #endif |
981 | 997 | ||
982 | #ifdef CONFIG_SMP | 998 | #ifdef CONFIG_SMP |
983 | static int select_task_rq_fair(struct task_struct *p, int sync) | 999 | |
1000 | static const struct sched_class fair_sched_class; | ||
1001 | |||
1002 | static int | ||
1003 | wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | ||
1004 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
1005 | int idx, unsigned long load, unsigned long this_load, | ||
1006 | unsigned int imbalance) | ||
984 | { | 1007 | { |
985 | int cpu, this_cpu; | 1008 | struct task_struct *curr = this_rq->curr; |
986 | struct rq *rq; | 1009 | unsigned long tl = this_load; |
987 | struct sched_domain *sd, *this_sd = NULL; | 1010 | unsigned long tl_per_task; |
988 | int new_cpu; | 1011 | |
1012 | if (!(this_sd->flags & SD_WAKE_AFFINE)) | ||
1013 | return 0; | ||
1014 | |||
1015 | /* | ||
1016 | * If the currently running task will sleep within | ||
1017 | * a reasonable amount of time then attract this newly | ||
1018 | * woken task: | ||
1019 | */ | ||
1020 | if (sync && curr->sched_class == &fair_sched_class) { | ||
1021 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
1022 | p->se.avg_overlap < sysctl_sched_migration_cost) | ||
1023 | return 1; | ||
1024 | } | ||
989 | 1025 | ||
990 | cpu = task_cpu(p); | 1026 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
991 | rq = task_rq(p); | 1027 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
992 | this_cpu = smp_processor_id(); | ||
993 | new_cpu = cpu; | ||
994 | 1028 | ||
995 | if (cpu == this_cpu) | 1029 | /* |
996 | goto out_set_cpu; | 1030 | * If sync wakeup then subtract the (maximum possible) |
1031 | * effect of the currently running task from the load | ||
1032 | * of the current CPU: | ||
1033 | */ | ||
1034 | if (sync) | ||
1035 | tl -= current->se.load.weight; | ||
1036 | |||
1037 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || | ||
1038 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
1039 | /* | ||
1040 | * This domain has SD_WAKE_AFFINE and | ||
1041 | * p is cache cold in this domain, and | ||
1042 | * there is no bad imbalance. | ||
1043 | */ | ||
1044 | schedstat_inc(this_sd, ttwu_move_affine); | ||
1045 | schedstat_inc(p, se.nr_wakeups_affine); | ||
997 | 1046 | ||
1047 | return 1; | ||
1048 | } | ||
1049 | return 0; | ||
1050 | } | ||
1051 | |||
1052 | static int select_task_rq_fair(struct task_struct *p, int sync) | ||
1053 | { | ||
1054 | struct sched_domain *sd, *this_sd = NULL; | ||
1055 | int prev_cpu, this_cpu, new_cpu; | ||
1056 | unsigned long load, this_load; | ||
1057 | struct rq *rq, *this_rq; | ||
1058 | unsigned int imbalance; | ||
1059 | int idx; | ||
1060 | |||
1061 | prev_cpu = task_cpu(p); | ||
1062 | rq = task_rq(p); | ||
1063 | this_cpu = smp_processor_id(); | ||
1064 | this_rq = cpu_rq(this_cpu); | ||
1065 | new_cpu = prev_cpu; | ||
1066 | |||
1067 | /* | ||
1068 | * 'this_sd' is the first domain that both | ||
1069 | * this_cpu and prev_cpu are present in: | ||
1070 | */ | ||
998 | for_each_domain(this_cpu, sd) { | 1071 | for_each_domain(this_cpu, sd) { |
999 | if (cpu_isset(cpu, sd->span)) { | 1072 | if (cpu_isset(prev_cpu, sd->span)) { |
1000 | this_sd = sd; | 1073 | this_sd = sd; |
1001 | break; | 1074 | break; |
1002 | } | 1075 | } |
1003 | } | 1076 | } |
1004 | 1077 | ||
1005 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1078 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1006 | goto out_set_cpu; | 1079 | goto out; |
1007 | 1080 | ||
1008 | /* | 1081 | /* |
1009 | * Check for affine wakeup and passive balancing possibilities. | 1082 | * Check for affine wakeup and passive balancing possibilities. |
1010 | */ | 1083 | */ |
1011 | if (this_sd) { | 1084 | if (!this_sd) |
1012 | int idx = this_sd->wake_idx; | 1085 | goto out; |
1013 | unsigned int imbalance; | ||
1014 | unsigned long load, this_load; | ||
1015 | |||
1016 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | ||
1017 | |||
1018 | load = source_load(cpu, idx); | ||
1019 | this_load = target_load(this_cpu, idx); | ||
1020 | |||
1021 | new_cpu = this_cpu; /* Wake to this CPU if we can */ | ||
1022 | |||
1023 | if (this_sd->flags & SD_WAKE_AFFINE) { | ||
1024 | unsigned long tl = this_load; | ||
1025 | unsigned long tl_per_task; | ||
1026 | |||
1027 | /* | ||
1028 | * Attract cache-cold tasks on sync wakeups: | ||
1029 | */ | ||
1030 | if (sync && !task_hot(p, rq->clock, this_sd)) | ||
1031 | goto out_set_cpu; | ||
1032 | |||
1033 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
1034 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
1035 | |||
1036 | /* | ||
1037 | * If sync wakeup then subtract the (maximum possible) | ||
1038 | * effect of the currently running task from the load | ||
1039 | * of the current CPU: | ||
1040 | */ | ||
1041 | if (sync) | ||
1042 | tl -= current->se.load.weight; | ||
1043 | |||
1044 | if ((tl <= load && | ||
1045 | tl + target_load(cpu, idx) <= tl_per_task) || | ||
1046 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
1047 | /* | ||
1048 | * This domain has SD_WAKE_AFFINE and | ||
1049 | * p is cache cold in this domain, and | ||
1050 | * there is no bad imbalance. | ||
1051 | */ | ||
1052 | schedstat_inc(this_sd, ttwu_move_affine); | ||
1053 | schedstat_inc(p, se.nr_wakeups_affine); | ||
1054 | goto out_set_cpu; | ||
1055 | } | ||
1056 | } | ||
1057 | 1086 | ||
1058 | /* | 1087 | idx = this_sd->wake_idx; |
1059 | * Start passive balancing when half the imbalance_pct | 1088 | |
1060 | * limit is reached. | 1089 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; |
1061 | */ | 1090 | |
1062 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1091 | load = source_load(prev_cpu, idx); |
1063 | if (imbalance*this_load <= 100*load) { | 1092 | this_load = target_load(this_cpu, idx); |
1064 | schedstat_inc(this_sd, ttwu_move_balance); | 1093 | |
1065 | schedstat_inc(p, se.nr_wakeups_passive); | 1094 | if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, |
1066 | goto out_set_cpu; | 1095 | load, this_load, imbalance)) |
1067 | } | 1096 | return this_cpu; |
1097 | |||
1098 | if (prev_cpu == this_cpu) | ||
1099 | goto out; | ||
1100 | |||
1101 | /* | ||
1102 | * Start passive balancing when half the imbalance_pct | ||
1103 | * limit is reached. | ||
1104 | */ | ||
1105 | if (this_sd->flags & SD_WAKE_BALANCE) { | ||
1106 | if (imbalance*this_load <= 100*load) { | ||
1107 | schedstat_inc(this_sd, ttwu_move_balance); | ||
1108 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1109 | return this_cpu; | ||
1068 | } | 1110 | } |
1069 | } | 1111 | } |
1070 | 1112 | ||
1071 | new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ | 1113 | out: |
1072 | out_set_cpu: | ||
1073 | return wake_idle(new_cpu, p); | 1114 | return wake_idle(new_cpu, p); |
1074 | } | 1115 | } |
1075 | #endif /* CONFIG_SMP */ | 1116 | #endif /* CONFIG_SMP */ |
@@ -1092,6 +1133,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
1092 | return; | 1133 | return; |
1093 | } | 1134 | } |
1094 | 1135 | ||
1136 | se->last_wakeup = se->sum_exec_runtime; | ||
1137 | if (unlikely(se == pse)) | ||
1138 | return; | ||
1139 | |||
1095 | cfs_rq_of(pse)->next = pse; | 1140 | cfs_rq_of(pse)->next = pse; |
1096 | 1141 | ||
1097 | /* | 1142 | /* |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 548c436a776b..278534bbca95 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -141,13 +141,8 @@ static void clocksource_watchdog(unsigned long data) | |||
141 | } | 141 | } |
142 | 142 | ||
143 | if (!list_empty(&watchdog_list)) { | 143 | if (!list_empty(&watchdog_list)) { |
144 | /* Cycle through CPUs to check if the CPUs stay synchronized to | 144 | __mod_timer(&watchdog_timer, |
145 | * each other. */ | 145 | watchdog_timer.expires + WATCHDOG_INTERVAL); |
146 | int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map); | ||
147 | if (next_cpu >= NR_CPUS) | ||
148 | next_cpu = first_cpu(cpu_online_map); | ||
149 | watchdog_timer.expires += WATCHDOG_INTERVAL; | ||
150 | add_timer_on(&watchdog_timer, next_cpu); | ||
151 | } | 146 | } |
152 | spin_unlock(&watchdog_lock); | 147 | spin_unlock(&watchdog_lock); |
153 | } | 148 | } |
@@ -169,7 +164,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
169 | if (!started && watchdog) { | 164 | if (!started && watchdog) { |
170 | watchdog_last = watchdog->read(); | 165 | watchdog_last = watchdog->read(); |
171 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | 166 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; |
172 | add_timer_on(&watchdog_timer, first_cpu(cpu_online_map)); | 167 | add_timer(&watchdog_timer); |
173 | } | 168 | } |
174 | } else { | 169 | } else { |
175 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 170 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
@@ -190,8 +185,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
190 | watchdog_last = watchdog->read(); | 185 | watchdog_last = watchdog->read(); |
191 | watchdog_timer.expires = | 186 | watchdog_timer.expires = |
192 | jiffies + WATCHDOG_INTERVAL; | 187 | jiffies + WATCHDOG_INTERVAL; |
193 | add_timer_on(&watchdog_timer, | 188 | add_timer(&watchdog_timer); |
194 | first_cpu(cpu_online_map)); | ||
195 | } | 189 | } |
196 | } | 190 | } |
197 | } | 191 | } |