Merge branch 'linus' into smp/hotplug

Apply upstream changes to avoid conflicts with pending patches.
author: Thomas Gleixner <tglx@linutronix.de> 2016-09-01 12:33:46 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2016-09-01 12:33:46 -0400
commit: 0cb7bf61b1e9f05027de58c80f9b46a714d24e35 (patch)
tree: 41fb55cf62d07b425122f9a8b96412c0d8eb99c5 /kernel/sched
parent: aa877175e7a9982233ed8f10cb4bfddd78d82741 (diff)
parent: 3eab887a55424fc2c27553b7bfe32330df83f7b8 (diff)
5 files changed, 58 insertions, 11 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c883fe8e440..2a906f20fba7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
 #include <linux/frame.h>
+#include <linux/prefetch.h>
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -2972,6 +2973,23 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
 /*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+        struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+        struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+        prefetch(curr);
+        prefetch(&curr->exec_start);
+}
+/*
 * Return accounted runtime for the task.
 * In case the task is currently running, return the runtime plus current's
 * pending runtime that have not been accounted yet.
@@ -3005,6 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
         * thread, breaking clock_gettime().
         */
        if (task_current(rq, p) && task_on_rq_queued(p)) {
+                prefetch_curr_exec_start(p);
                update_rq_clock(rq);
                p->sched_class->update_curr(rq);
        }
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5be58820465c..d4184498c9f5 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -168,7 +168,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
        if (old_idx == IDX_INVALID) {
                cp->size++;
-                cp->elements[cp->size - 1].dl = 0;
+                cp->elements[cp->size - 1].dl = dl;
                cp->elements[cp->size - 1].cpu = cpu;
                cp->elements[cpu].idx = cp->size - 1;
                cpudl_change_key(cp, cp->size - 1, dl);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1934f658c036..a846cf89eb96 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime)
                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }
+/*
+ * When a guest is interrupted for a longer amount of time, missed clock
+ * ticks are not redelivered later. Due to that, this function may on
+ * occasion account more time than the calling functions think elapsed.
+ */
 static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 {
 #ifdef CONFIG_PARAVIRT
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
         * idle, or potentially user or system time. Due to rounding,
         * other time can exceed ticks occasionally.
         */
-        other = account_other_time(cputime);
+        other = account_other_time(ULONG_MAX);
        if (other >= cputime)
                return;
        cputime -= other;
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
        }
        cputime = cputime_one_jiffy;
-        steal = steal_account_process_time(cputime);
+        steal = steal_account_process_time(ULONG_MAX);
        if (steal >= cputime)
                return;
@@ -508,13 +513,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
 */
 void account_idle_ticks(unsigned long ticks)
 {
+        cputime_t cputime, steal;
        if (sched_clock_irqtime) {
                irqtime_account_idle_ticks(ticks);
                return;
        }
-        account_idle_time(jiffies_to_cputime(ticks));
+        cputime = jiffies_to_cputime(ticks);
+        steal = steal_account_process_time(ULONG_MAX);
+        if (steal >= cputime)
+                return;
+        cputime -= steal;
+        account_idle_time(cputime);
 }
 /*
@@ -606,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr,
        stime = curr->stime;
        utime = curr->utime;
-        if (utime == 0) {
+        /*
-                stime = rtime;
+         * If either stime or both stime and utime are 0, assume all runtime is
+         * userspace. Once a task gets some ticks, the monotonicy code at
+         * 'update' will ensure things converge to the observed ratio.
+         */
+        if (stime == 0) {
+                utime = rtime;
                goto update;
        }
-        if (stime == 0) {
+        if (utime == 0) {
-                utime = rtime;
+                stime = rtime;
                goto update;
        }
        stime = scale_stime((__force u64)stime, (__force u64)rtime,
                            (__force u64)(stime + utime));
+update:
        /*
         * Make sure stime doesn't go backwards; this preserves monotonicity
         * for utime because rtime is monotonic.
@@ -641,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr,
                stime = rtime - utime;
        }
-update:
        prev->stime = stime;
        prev->utime = utime;
 out:
@@ -686,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
        unsigned long now = READ_ONCE(jiffies);
        cputime_t delta, other;
+        /*
+         * Unlike tick based timing, vtime based timing never has lost
+         * ticks, and no need for steal time accounting to make up for
+         * lost ticks. Vtime accounts a rounded version of actual
+         * elapsed time. Limit account_other_time to prevent rounding
+         * errors from causing elapsed vtime to go negative.
+         */
        delta = jiffies_to_cputime(now - tsk->vtime_snap);
        other = account_other_time(delta);
        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fcb7f0217ff4..1ce8867283dc 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -658,8 +658,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
         *
         * XXX figure out if select_task_rq_dl() deals with offline cpus.
         */
-        if (unlikely(!rq->online))
+        if (unlikely(!rq->online)) {
+                lockdep_unpin_lock(&rq->lock, rf.cookie);
                rq = dl_task_offline_migration(rq, p);
+                rf.cookie = lockdep_pin_lock(&rq->lock);
+        }
        /*
         * Queueing this task back might have overloaded rq, check if we need
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4088eedea763..039de34f1521 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4269,7 +4269,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
        pcfs_rq = tg->parent->cfs_rq[cpu];
        cfs_rq->throttle_count = pcfs_rq->throttle_count;
-        pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+        cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
 }
 /* conditionally throttle active cfs_rq's from put_prev_entity() */
author	Thomas Gleixner <tglx@linutronix.de>	2016-09-01 12:33:46 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2016-09-01 12:33:46 -0400
commit	0cb7bf61b1e9f05027de58c80f9b46a714d24e35 (patch)
tree	41fb55cf62d07b425122f9a8b96412c0d8eb99c5 /kernel/sched
parent	aa877175e7a9982233ed8f10cb4bfddd78d82741 (diff)
parent	3eab887a55424fc2c27553b7bfe32330df83f7b8 (diff)