aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/relay.c5
-rw-r--r--kernel/sched.c11
-rw-r--r--kernel/sched_debug.c1
-rw-r--r--kernel/sched_fair.c191
-rw-r--r--kernel/time/clocksource.c14
5 files changed, 136 insertions, 86 deletions
diff --git a/kernel/relay.c b/kernel/relay.c
index d080b9d161a7..4c035a8a248c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1066,7 +1066,7 @@ static int subbuf_splice_actor(struct file *in,
1066 unsigned int flags, 1066 unsigned int flags,
1067 int *nonpad_ret) 1067 int *nonpad_ret)
1068{ 1068{
1069 unsigned int pidx, poff, total_len, subbuf_pages, ret; 1069 unsigned int pidx, poff, total_len, subbuf_pages, nr_pages, ret;
1070 struct rchan_buf *rbuf = in->private_data; 1070 struct rchan_buf *rbuf = in->private_data;
1071 unsigned int subbuf_size = rbuf->chan->subbuf_size; 1071 unsigned int subbuf_size = rbuf->chan->subbuf_size;
1072 uint64_t pos = (uint64_t) *ppos; 1072 uint64_t pos = (uint64_t) *ppos;
@@ -1097,8 +1097,9 @@ static int subbuf_splice_actor(struct file *in,
1097 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; 1097 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
1098 pidx = (read_start / PAGE_SIZE) % subbuf_pages; 1098 pidx = (read_start / PAGE_SIZE) % subbuf_pages;
1099 poff = read_start & ~PAGE_MASK; 1099 poff = read_start & ~PAGE_MASK;
1100 nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS);
1100 1101
1101 for (total_len = 0; spd.nr_pages < subbuf_pages; spd.nr_pages++) { 1102 for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
1102 unsigned int this_len, this_end, private; 1103 unsigned int this_len, this_end, private;
1103 unsigned int cur_pos = read_start + total_len; 1104 unsigned int cur_pos = read_start + total_len;
1104 1105
diff --git a/kernel/sched.c b/kernel/sched.c
index d1ad69b270ca..3f7c5eb254e2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1396,6 +1396,12 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
1396{ 1396{
1397 s64 delta; 1397 s64 delta;
1398 1398
1399 /*
1400 * Buddy candidates are cache hot:
1401 */
1402 if (&p->se == cfs_rq_of(&p->se)->next)
1403 return 1;
1404
1399 if (p->sched_class != &fair_sched_class) 1405 if (p->sched_class != &fair_sched_class)
1400 return 0; 1406 return 0;
1401 1407
@@ -1855,10 +1861,11 @@ out_activate:
1855 schedstat_inc(p, se.nr_wakeups_remote); 1861 schedstat_inc(p, se.nr_wakeups_remote);
1856 update_rq_clock(rq); 1862 update_rq_clock(rq);
1857 activate_task(rq, p, 1); 1863 activate_task(rq, p, 1);
1858 check_preempt_curr(rq, p);
1859 success = 1; 1864 success = 1;
1860 1865
1861out_running: 1866out_running:
1867 check_preempt_curr(rq, p);
1868
1862 p->state = TASK_RUNNING; 1869 p->state = TASK_RUNNING;
1863#ifdef CONFIG_SMP 1870#ifdef CONFIG_SMP
1864 if (p->sched_class->task_wake_up) 1871 if (p->sched_class->task_wake_up)
@@ -1892,6 +1899,8 @@ static void __sched_fork(struct task_struct *p)
1892 p->se.exec_start = 0; 1899 p->se.exec_start = 0;
1893 p->se.sum_exec_runtime = 0; 1900 p->se.sum_exec_runtime = 0;
1894 p->se.prev_sum_exec_runtime = 0; 1901 p->se.prev_sum_exec_runtime = 0;
1902 p->se.last_wakeup = 0;
1903 p->se.avg_overlap = 0;
1895 1904
1896#ifdef CONFIG_SCHEDSTATS 1905#ifdef CONFIG_SCHEDSTATS
1897 p->se.wait_start = 0; 1906 p->se.wait_start = 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 4b5e24cf2f4a..ef358ba07683 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -288,6 +288,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
288 PN(se.exec_start); 288 PN(se.exec_start);
289 PN(se.vruntime); 289 PN(se.vruntime);
290 PN(se.sum_exec_runtime); 290 PN(se.sum_exec_runtime);
291 PN(se.avg_overlap);
291 292
292 nr_switches = p->nvcsw + p->nivcsw; 293 nr_switches = p->nvcsw + p->nivcsw;
293 294
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2cc59080efa..b85cac4b5e25 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,13 +73,13 @@ unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
73 73
74/* 74/*
75 * SCHED_OTHER wake-up granularity. 75 * SCHED_OTHER wake-up granularity.
76 * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds) 76 * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
77 * 77 *
78 * This option delays the preemption effects of decoupled workloads 78 * This option delays the preemption effects of decoupled workloads
79 * and reduces their over-scheduling. Synchronous workloads will still 79 * and reduces their over-scheduling. Synchronous workloads will still
80 * have immediate wakeup/sleep latencies. 80 * have immediate wakeup/sleep latencies.
81 */ 81 */
82unsigned int sysctl_sched_wakeup_granularity = 10000000UL; 82unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
83 83
84const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 84const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
85 85
@@ -556,6 +556,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
556 account_entity_enqueue(cfs_rq, se); 556 account_entity_enqueue(cfs_rq, se);
557} 557}
558 558
559static void update_avg(u64 *avg, u64 sample)
560{
561 s64 diff = sample - *avg;
562 *avg += diff >> 3;
563}
564
565static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
566{
567 if (!se->last_wakeup)
568 return;
569
570 update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
571 se->last_wakeup = 0;
572}
573
559static void 574static void
560dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 575dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
561{ 576{
@@ -566,6 +581,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
566 581
567 update_stats_dequeue(cfs_rq, se); 582 update_stats_dequeue(cfs_rq, se);
568 if (sleep) { 583 if (sleep) {
584 update_avg_stats(cfs_rq, se);
569#ifdef CONFIG_SCHEDSTATS 585#ifdef CONFIG_SCHEDSTATS
570 if (entity_is_task(se)) { 586 if (entity_is_task(se)) {
571 struct task_struct *tsk = task_of(se); 587 struct task_struct *tsk = task_of(se);
@@ -980,96 +996,121 @@ static inline int wake_idle(int cpu, struct task_struct *p)
980#endif 996#endif
981 997
982#ifdef CONFIG_SMP 998#ifdef CONFIG_SMP
983static int select_task_rq_fair(struct task_struct *p, int sync) 999
1000static const struct sched_class fair_sched_class;
1001
1002static int
1003wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
1004 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
1005 int idx, unsigned long load, unsigned long this_load,
1006 unsigned int imbalance)
984{ 1007{
985 int cpu, this_cpu; 1008 struct task_struct *curr = this_rq->curr;
986 struct rq *rq; 1009 unsigned long tl = this_load;
987 struct sched_domain *sd, *this_sd = NULL; 1010 unsigned long tl_per_task;
988 int new_cpu; 1011
1012 if (!(this_sd->flags & SD_WAKE_AFFINE))
1013 return 0;
1014
1015 /*
1016 * If the currently running task will sleep within
1017 * a reasonable amount of time then attract this newly
1018 * woken task:
1019 */
1020 if (sync && curr->sched_class == &fair_sched_class) {
1021 if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
1022 p->se.avg_overlap < sysctl_sched_migration_cost)
1023 return 1;
1024 }
989 1025
990 cpu = task_cpu(p); 1026 schedstat_inc(p, se.nr_wakeups_affine_attempts);
991 rq = task_rq(p); 1027 tl_per_task = cpu_avg_load_per_task(this_cpu);
992 this_cpu = smp_processor_id();
993 new_cpu = cpu;
994 1028
995 if (cpu == this_cpu) 1029 /*
996 goto out_set_cpu; 1030 * If sync wakeup then subtract the (maximum possible)
1031 * effect of the currently running task from the load
1032 * of the current CPU:
1033 */
1034 if (sync)
1035 tl -= current->se.load.weight;
1036
1037 if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
1038 100*(tl + p->se.load.weight) <= imbalance*load) {
1039 /*
1040 * This domain has SD_WAKE_AFFINE and
1041 * p is cache cold in this domain, and
1042 * there is no bad imbalance.
1043 */
1044 schedstat_inc(this_sd, ttwu_move_affine);
1045 schedstat_inc(p, se.nr_wakeups_affine);
997 1046
1047 return 1;
1048 }
1049 return 0;
1050}
1051
1052static int select_task_rq_fair(struct task_struct *p, int sync)
1053{
1054 struct sched_domain *sd, *this_sd = NULL;
1055 int prev_cpu, this_cpu, new_cpu;
1056 unsigned long load, this_load;
1057 struct rq *rq, *this_rq;
1058 unsigned int imbalance;
1059 int idx;
1060
1061 prev_cpu = task_cpu(p);
1062 rq = task_rq(p);
1063 this_cpu = smp_processor_id();
1064 this_rq = cpu_rq(this_cpu);
1065 new_cpu = prev_cpu;
1066
1067 /*
1068 * 'this_sd' is the first domain that both
1069 * this_cpu and prev_cpu are present in:
1070 */
998 for_each_domain(this_cpu, sd) { 1071 for_each_domain(this_cpu, sd) {
999 if (cpu_isset(cpu, sd->span)) { 1072 if (cpu_isset(prev_cpu, sd->span)) {
1000 this_sd = sd; 1073 this_sd = sd;
1001 break; 1074 break;
1002 } 1075 }
1003 } 1076 }
1004 1077
1005 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1078 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
1006 goto out_set_cpu; 1079 goto out;
1007 1080
1008 /* 1081 /*
1009 * Check for affine wakeup and passive balancing possibilities. 1082 * Check for affine wakeup and passive balancing possibilities.
1010 */ 1083 */
1011 if (this_sd) { 1084 if (!this_sd)
1012 int idx = this_sd->wake_idx; 1085 goto out;
1013 unsigned int imbalance;
1014 unsigned long load, this_load;
1015
1016 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
1017
1018 load = source_load(cpu, idx);
1019 this_load = target_load(this_cpu, idx);
1020
1021 new_cpu = this_cpu; /* Wake to this CPU if we can */
1022
1023 if (this_sd->flags & SD_WAKE_AFFINE) {
1024 unsigned long tl = this_load;
1025 unsigned long tl_per_task;
1026
1027 /*
1028 * Attract cache-cold tasks on sync wakeups:
1029 */
1030 if (sync && !task_hot(p, rq->clock, this_sd))
1031 goto out_set_cpu;
1032
1033 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1034 tl_per_task = cpu_avg_load_per_task(this_cpu);
1035
1036 /*
1037 * If sync wakeup then subtract the (maximum possible)
1038 * effect of the currently running task from the load
1039 * of the current CPU:
1040 */
1041 if (sync)
1042 tl -= current->se.load.weight;
1043
1044 if ((tl <= load &&
1045 tl + target_load(cpu, idx) <= tl_per_task) ||
1046 100*(tl + p->se.load.weight) <= imbalance*load) {
1047 /*
1048 * This domain has SD_WAKE_AFFINE and
1049 * p is cache cold in this domain, and
1050 * there is no bad imbalance.
1051 */
1052 schedstat_inc(this_sd, ttwu_move_affine);
1053 schedstat_inc(p, se.nr_wakeups_affine);
1054 goto out_set_cpu;
1055 }
1056 }
1057 1086
1058 /* 1087 idx = this_sd->wake_idx;
1059 * Start passive balancing when half the imbalance_pct 1088
1060 * limit is reached. 1089 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
1061 */ 1090
1062 if (this_sd->flags & SD_WAKE_BALANCE) { 1091 load = source_load(prev_cpu, idx);
1063 if (imbalance*this_load <= 100*load) { 1092 this_load = target_load(this_cpu, idx);
1064 schedstat_inc(this_sd, ttwu_move_balance); 1093
1065 schedstat_inc(p, se.nr_wakeups_passive); 1094 if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
1066 goto out_set_cpu; 1095 load, this_load, imbalance))
1067 } 1096 return this_cpu;
1097
1098 if (prev_cpu == this_cpu)
1099 goto out;
1100
1101 /*
1102 * Start passive balancing when half the imbalance_pct
1103 * limit is reached.
1104 */
1105 if (this_sd->flags & SD_WAKE_BALANCE) {
1106 if (imbalance*this_load <= 100*load) {
1107 schedstat_inc(this_sd, ttwu_move_balance);
1108 schedstat_inc(p, se.nr_wakeups_passive);
1109 return this_cpu;
1068 } 1110 }
1069 } 1111 }
1070 1112
1071 new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ 1113out:
1072out_set_cpu:
1073 return wake_idle(new_cpu, p); 1114 return wake_idle(new_cpu, p);
1074} 1115}
1075#endif /* CONFIG_SMP */ 1116#endif /* CONFIG_SMP */
@@ -1092,6 +1133,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1092 return; 1133 return;
1093 } 1134 }
1094 1135
1136 se->last_wakeup = se->sum_exec_runtime;
1137 if (unlikely(se == pse))
1138 return;
1139
1095 cfs_rq_of(pse)->next = pse; 1140 cfs_rq_of(pse)->next = pse;
1096 1141
1097 /* 1142 /*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 548c436a776b..278534bbca95 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -141,13 +141,8 @@ static void clocksource_watchdog(unsigned long data)
141 } 141 }
142 142
143 if (!list_empty(&watchdog_list)) { 143 if (!list_empty(&watchdog_list)) {
144 /* Cycle through CPUs to check if the CPUs stay synchronized to 144 __mod_timer(&watchdog_timer,
145 * each other. */ 145 watchdog_timer.expires + WATCHDOG_INTERVAL);
146 int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map);
147 if (next_cpu >= NR_CPUS)
148 next_cpu = first_cpu(cpu_online_map);
149 watchdog_timer.expires += WATCHDOG_INTERVAL;
150 add_timer_on(&watchdog_timer, next_cpu);
151 } 146 }
152 spin_unlock(&watchdog_lock); 147 spin_unlock(&watchdog_lock);
153} 148}
@@ -169,7 +164,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
169 if (!started && watchdog) { 164 if (!started && watchdog) {
170 watchdog_last = watchdog->read(); 165 watchdog_last = watchdog->read();
171 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 166 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
172 add_timer_on(&watchdog_timer, first_cpu(cpu_online_map)); 167 add_timer(&watchdog_timer);
173 } 168 }
174 } else { 169 } else {
175 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 170 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
@@ -190,8 +185,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
190 watchdog_last = watchdog->read(); 185 watchdog_last = watchdog->read();
191 watchdog_timer.expires = 186 watchdog_timer.expires =
192 jiffies + WATCHDOG_INTERVAL; 187 jiffies + WATCHDOG_INTERVAL;
193 add_timer_on(&watchdog_timer, 188 add_timer(&watchdog_timer);
194 first_cpu(cpu_online_map));
195 } 189 }
196 } 190 }
197 } 191 }