aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/context_tracking.c43
-rw-r--r--kernel/cpu.c4
-rw-r--r--kernel/delayacct.c7
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/manage.c1
-rw-r--r--kernel/mutex.c1
-rw-r--r--kernel/posix-cpu-timers.c28
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c1
-rw-r--r--kernel/rtmutex.c1
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/cpupri.c2
-rw-r--r--kernel/sched/cputime.c314
-rw-r--r--kernel/sched/fair.c27
-rw-r--r--kernel/sched/rt.c26
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/signal.c12
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/tick-sched.c5
-rw-r--r--kernel/timer.c1
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/tsacct.c44
-rw-r--r--kernel/watchdog.c1
29 files changed, 462 insertions, 123 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 051e071a06e7..e8b1627ab9c7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -566,6 +566,7 @@ out:
566void acct_collect(long exitcode, int group_dead) 566void acct_collect(long exitcode, int group_dead)
567{ 567{
568 struct pacct_struct *pacct = &current->signal->pacct; 568 struct pacct_struct *pacct = &current->signal->pacct;
569 cputime_t utime, stime;
569 unsigned long vsize = 0; 570 unsigned long vsize = 0;
570 571
571 if (group_dead && current->mm) { 572 if (group_dead && current->mm) {
@@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead)
593 pacct->ac_flag |= ACORE; 594 pacct->ac_flag |= ACORE;
594 if (current->flags & PF_SIGNALED) 595 if (current->flags & PF_SIGNALED)
595 pacct->ac_flag |= AXSIG; 596 pacct->ac_flag |= AXSIG;
596 pacct->ac_utime += current->utime; 597 task_cputime(current, &utime, &stime);
597 pacct->ac_stime += current->stime; 598 pacct->ac_utime += utime;
599 pacct->ac_stime += stime;
598 pacct->ac_minflt += current->min_flt; 600 pacct->ac_minflt += current->min_flt;
599 pacct->ac_majflt += current->maj_flt; 601 pacct->ac_majflt += current->maj_flt;
600 spin_unlock_irq(&current->sighand->siglock); 602 spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index d566aba7e801..65349f07b878 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -15,26 +15,13 @@
15 */ 15 */
16 16
17#include <linux/context_tracking.h> 17#include <linux/context_tracking.h>
18#include <linux/kvm_host.h>
18#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
19#include <linux/sched.h> 20#include <linux/sched.h>
20#include <linux/percpu.h>
21#include <linux/hardirq.h> 21#include <linux/hardirq.h>
22#include <linux/export.h>
22 23
23struct context_tracking { 24DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
24 /*
25 * When active is false, probes are unset in order
26 * to minimize overhead: TIF flags are cleared
27 * and calls to user_enter/exit are ignored. This
28 * may be further optimized using static keys.
29 */
30 bool active;
31 enum {
32 IN_KERNEL = 0,
33 IN_USER,
34 } state;
35};
36
37static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
38#ifdef CONFIG_CONTEXT_TRACKING_FORCE 25#ifdef CONFIG_CONTEXT_TRACKING_FORCE
39 .active = true, 26 .active = true,
40#endif 27#endif
@@ -70,7 +57,6 @@ void user_enter(void)
70 local_irq_save(flags); 57 local_irq_save(flags);
71 if (__this_cpu_read(context_tracking.active) && 58 if (__this_cpu_read(context_tracking.active) &&
72 __this_cpu_read(context_tracking.state) != IN_USER) { 59 __this_cpu_read(context_tracking.state) != IN_USER) {
73 __this_cpu_write(context_tracking.state, IN_USER);
74 /* 60 /*
75 * At this stage, only low level arch entry code remains and 61 * At this stage, only low level arch entry code remains and
76 * then we'll run in userspace. We can assume there won't be 62 * then we'll run in userspace. We can assume there won't be
@@ -78,7 +64,9 @@ void user_enter(void)
78 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 64 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
79 * on the tick. 65 * on the tick.
80 */ 66 */
67 vtime_user_enter(current);
81 rcu_user_enter(); 68 rcu_user_enter();
69 __this_cpu_write(context_tracking.state, IN_USER);
82 } 70 }
83 local_irq_restore(flags); 71 local_irq_restore(flags);
84} 72}
@@ -104,16 +92,35 @@ void user_exit(void)
104 92
105 local_irq_save(flags); 93 local_irq_save(flags);
106 if (__this_cpu_read(context_tracking.state) == IN_USER) { 94 if (__this_cpu_read(context_tracking.state) == IN_USER) {
107 __this_cpu_write(context_tracking.state, IN_KERNEL);
108 /* 95 /*
109 * We are going to run code that may use RCU. Inform 96 * We are going to run code that may use RCU. Inform
110 * RCU core about that (ie: we may need the tick again). 97 * RCU core about that (ie: we may need the tick again).
111 */ 98 */
112 rcu_user_exit(); 99 rcu_user_exit();
100 vtime_user_exit(current);
101 __this_cpu_write(context_tracking.state, IN_KERNEL);
113 } 102 }
114 local_irq_restore(flags); 103 local_irq_restore(flags);
115} 104}
116 105
106void guest_enter(void)
107{
108 if (vtime_accounting_enabled())
109 vtime_guest_enter(current);
110 else
111 __guest_enter();
112}
113EXPORT_SYMBOL_GPL(guest_enter);
114
115void guest_exit(void)
116{
117 if (vtime_accounting_enabled())
118 vtime_guest_exit(current);
119 else
120 __guest_exit();
121}
122EXPORT_SYMBOL_GPL(guest_exit);
123
117 124
118/** 125/**
119 * context_tracking_task_switch - context switch the syscall callbacks 126 * context_tracking_task_switch - context switch the syscall callbacks
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3046a503242c..e5d5e8e1e030 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu)
224static inline void check_for_tasks(int cpu) 224static inline void check_for_tasks(int cpu)
225{ 225{
226 struct task_struct *p; 226 struct task_struct *p;
227 cputime_t utime, stime;
227 228
228 write_lock_irq(&tasklist_lock); 229 write_lock_irq(&tasklist_lock);
229 for_each_process(p) { 230 for_each_process(p) {
231 task_cputime(p, &utime, &stime);
230 if (task_cpu(p) == cpu && p->state == TASK_RUNNING && 232 if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
231 (p->utime || p->stime)) 233 (utime || stime))
232 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " 234 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
233 "(state = %ld, flags = %x)\n", 235 "(state = %ld, flags = %x)\n",
234 p->comm, task_pid_nr(p), cpu, 236 p->comm, task_pid_nr(p), cpu,
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 418b3f7053aa..d473988c1d0b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
106 unsigned long long t2, t3; 106 unsigned long long t2, t3;
107 unsigned long flags; 107 unsigned long flags;
108 struct timespec ts; 108 struct timespec ts;
109 cputime_t utime, stime, stimescaled, utimescaled;
109 110
110 /* Though tsk->delays accessed later, early exit avoids 111 /* Though tsk->delays accessed later, early exit avoids
111 * unnecessary returning of other data 112 * unnecessary returning of other data
@@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
114 goto done; 115 goto done;
115 116
116 tmp = (s64)d->cpu_run_real_total; 117 tmp = (s64)d->cpu_run_real_total;
117 cputime_to_timespec(tsk->utime + tsk->stime, &ts); 118 task_cputime(tsk, &utime, &stime);
119 cputime_to_timespec(utime + stime, &ts);
118 tmp += timespec_to_ns(&ts); 120 tmp += timespec_to_ns(&ts);
119 d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; 121 d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
120 122
121 tmp = (s64)d->cpu_scaled_run_real_total; 123 tmp = (s64)d->cpu_scaled_run_real_total;
122 cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts); 124 task_cputime_scaled(tsk, &utimescaled, &stimescaled);
125 cputime_to_timespec(utimescaled + stimescaled, &ts);
123 tmp += timespec_to_ns(&ts); 126 tmp += timespec_to_ns(&ts);
124 d->cpu_scaled_run_real_total = 127 d->cpu_scaled_run_real_total =
125 (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; 128 (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
diff --git a/kernel/exit.c b/kernel/exit.c
index b4df21937216..7dd20408707c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
85 bool group_dead = thread_group_leader(tsk); 85 bool group_dead = thread_group_leader(tsk);
86 struct sighand_struct *sighand; 86 struct sighand_struct *sighand;
87 struct tty_struct *uninitialized_var(tty); 87 struct tty_struct *uninitialized_var(tty);
88 cputime_t utime, stime;
88 89
89 sighand = rcu_dereference_check(tsk->sighand, 90 sighand = rcu_dereference_check(tsk->sighand,
90 lockdep_tasklist_lock_is_held()); 91 lockdep_tasklist_lock_is_held());
@@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk)
123 * We won't ever get here for the group leader, since it 124 * We won't ever get here for the group leader, since it
124 * will have been the last reference on the signal_struct. 125 * will have been the last reference on the signal_struct.
125 */ 126 */
126 sig->utime += tsk->utime; 127 task_cputime(tsk, &utime, &stime);
127 sig->stime += tsk->stime; 128 sig->utime += utime;
128 sig->gtime += tsk->gtime; 129 sig->stime += stime;
130 sig->gtime += task_gtime(tsk);
129 sig->min_flt += tsk->min_flt; 131 sig->min_flt += tsk->min_flt;
130 sig->maj_flt += tsk->maj_flt; 132 sig->maj_flt += tsk->maj_flt;
131 sig->nvcsw += tsk->nvcsw; 133 sig->nvcsw += tsk->nvcsw;
@@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1092 sig = p->signal; 1094 sig = p->signal;
1093 psig->cutime += tgutime + sig->cutime; 1095 psig->cutime += tgutime + sig->cutime;
1094 psig->cstime += tgstime + sig->cstime; 1096 psig->cstime += tgstime + sig->cstime;
1095 psig->cgtime += p->gtime + sig->gtime + sig->cgtime; 1097 psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
1096 psig->cmin_flt += 1098 psig->cmin_flt +=
1097 p->min_flt + sig->min_flt + sig->cmin_flt; 1099 p->min_flt + sig->min_flt + sig->cmin_flt;
1098 psig->cmaj_flt += 1100 psig->cmaj_flt +=
diff --git a/kernel/fork.c b/kernel/fork.c
index c535f33bbb9c..4133876d8cd2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1233#ifndef CONFIG_VIRT_CPU_ACCOUNTING 1233#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1234 p->prev_cputime.utime = p->prev_cputime.stime = 0; 1234 p->prev_cputime.utime = p->prev_cputime.stime = 0;
1235#endif 1235#endif
1236#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1237 seqlock_init(&p->vtime_seqlock);
1238 p->vtime_snap = 0;
1239 p->vtime_snap_whence = VTIME_SLEEPING;
1240#endif
1241
1236#if defined(SPLIT_RSS_COUNTING) 1242#if defined(SPLIT_RSS_COUNTING)
1237 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); 1243 memset(&p->rss_stat, 0, sizeof(p->rss_stat));
1238#endif 1244#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 19eb089ca003..9618b6e9fb36 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -60,6 +60,7 @@
60#include <linux/pid.h> 60#include <linux/pid.h>
61#include <linux/nsproxy.h> 61#include <linux/nsproxy.h>
62#include <linux/ptrace.h> 62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
63 64
64#include <asm/futex.h> 65#include <asm/futex.h>
65 66
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6db7a5ed52b5..c5dde988c0ce 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -44,6 +44,8 @@
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h> 46#include <linux/sched.h>
47#include <linux/sched/sysctl.h>
48#include <linux/sched/rt.h>
47#include <linux/timer.h> 49#include <linux/timer.h>
48 50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 88e7bed62711..fa17855ca65a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/sched/rt.h>
19#include <linux/task_work.h> 20#include <linux/task_work.h>
20 21
21#include "internals.h" 22#include "internals.h"
diff --git a/kernel/mutex.c b/kernel/mutex.c
index a307cc9c9526..52f23011b6e0 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -19,6 +19,7 @@
19 */ 19 */
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/sched/rt.h>
22#include <linux/export.h> 23#include <linux/export.h>
23#include <linux/spinlock.h> 24#include <linux/spinlock.h>
24#include <linux/interrupt.h> 25#include <linux/interrupt.h>
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index a278cad1d5d6..165d47698477 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer,
155 155
156static inline cputime_t prof_ticks(struct task_struct *p) 156static inline cputime_t prof_ticks(struct task_struct *p)
157{ 157{
158 return p->utime + p->stime; 158 cputime_t utime, stime;
159
160 task_cputime(p, &utime, &stime);
161
162 return utime + stime;
159} 163}
160static inline cputime_t virt_ticks(struct task_struct *p) 164static inline cputime_t virt_ticks(struct task_struct *p)
161{ 165{
162 return p->utime; 166 cputime_t utime;
167
168 task_cputime(p, &utime, NULL);
169
170 return utime;
163} 171}
164 172
165static int 173static int
@@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head,
471 */ 479 */
472void posix_cpu_timers_exit(struct task_struct *tsk) 480void posix_cpu_timers_exit(struct task_struct *tsk)
473{ 481{
482 cputime_t utime, stime;
483
474 add_device_randomness((const void*) &tsk->se.sum_exec_runtime, 484 add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
475 sizeof(unsigned long long)); 485 sizeof(unsigned long long));
486 task_cputime(tsk, &utime, &stime);
476 cleanup_timers(tsk->cpu_timers, 487 cleanup_timers(tsk->cpu_timers,
477 tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); 488 utime, stime, tsk->se.sum_exec_runtime);
478 489
479} 490}
480void posix_cpu_timers_exit_group(struct task_struct *tsk) 491void posix_cpu_timers_exit_group(struct task_struct *tsk)
481{ 492{
482 struct signal_struct *const sig = tsk->signal; 493 struct signal_struct *const sig = tsk->signal;
494 cputime_t utime, stime;
483 495
496 task_cputime(tsk, &utime, &stime);
484 cleanup_timers(tsk->signal->cpu_timers, 497 cleanup_timers(tsk->signal->cpu_timers,
485 tsk->utime + sig->utime, tsk->stime + sig->stime, 498 utime + sig->utime, stime + sig->stime,
486 tsk->se.sum_exec_runtime + sig->sum_sched_runtime); 499 tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
487} 500}
488 501
@@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
1226static inline int fastpath_timer_check(struct task_struct *tsk) 1239static inline int fastpath_timer_check(struct task_struct *tsk)
1227{ 1240{
1228 struct signal_struct *sig; 1241 struct signal_struct *sig;
1242 cputime_t utime, stime;
1243
1244 task_cputime(tsk, &utime, &stime);
1229 1245
1230 if (!task_cputime_zero(&tsk->cputime_expires)) { 1246 if (!task_cputime_zero(&tsk->cputime_expires)) {
1231 struct task_cputime task_sample = { 1247 struct task_cputime task_sample = {
1232 .utime = tsk->utime, 1248 .utime = utime,
1233 .stime = tsk->stime, 1249 .stime = stime,
1234 .sum_exec_runtime = tsk->se.sum_exec_runtime 1250 .sum_exec_runtime = tsk->se.sum_exec_runtime
1235 }; 1251 };
1236 1252
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 16502d3a71c8..13b243a323fa 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -17,6 +17,7 @@
17 * See rt.c in preempt-rt for proper credits and further information 17 * See rt.c in preempt-rt for proper credits and further information
18 */ 18 */
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/sched/rt.h>
20#include <linux/delay.h> 21#include <linux/delay.h>
21#include <linux/export.h> 22#include <linux/export.h>
22#include <linux/spinlock.h> 23#include <linux/spinlock.h>
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 98ec49475460..7890b10084a7 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -10,6 +10,7 @@
10#include <linux/kthread.h> 10#include <linux/kthread.h>
11#include <linux/export.h> 11#include <linux/export.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/sched/rt.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14#include <linux/timer.h> 15#include <linux/timer.h>
15#include <linux/freezer.h> 16#include <linux/freezer.h>
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a242e691c993..1e09308bf2a1 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -13,6 +13,7 @@
13#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/export.h> 14#include <linux/export.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/sched/rt.h>
16#include <linux/timer.h> 17#include <linux/timer.h>
17 18
18#include "rtmutex_common.h" 19#include "rtmutex_common.h"
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 26058d0bebba..4a88f1d51563 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4371,7 +4371,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
4371 struct task_struct *curr = current; 4371 struct task_struct *curr = current;
4372 struct rq *rq, *p_rq; 4372 struct rq *rq, *p_rq;
4373 unsigned long flags; 4373 unsigned long flags;
4374 bool yielded = 0; 4374 int yielded = 0;
4375 4375
4376 local_irq_save(flags); 4376 local_irq_save(flags);
4377 rq = this_rq(); 4377 rq = this_rq();
@@ -4667,6 +4667,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
4667 */ 4667 */
4668 idle->sched_class = &idle_sched_class; 4668 idle->sched_class = &idle_sched_class;
4669 ftrace_graph_init_idle_task(idle, cpu); 4669 ftrace_graph_init_idle_task(idle, cpu);
4670 vtime_init_idle(idle);
4670#if defined(CONFIG_SMP) 4671#if defined(CONFIG_SMP)
4671 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); 4672 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
4672#endif 4673#endif
@@ -7508,6 +7509,25 @@ static int sched_rt_global_constraints(void)
7508} 7509}
7509#endif /* CONFIG_RT_GROUP_SCHED */ 7510#endif /* CONFIG_RT_GROUP_SCHED */
7510 7511
7512int sched_rr_handler(struct ctl_table *table, int write,
7513 void __user *buffer, size_t *lenp,
7514 loff_t *ppos)
7515{
7516 int ret;
7517 static DEFINE_MUTEX(mutex);
7518
7519 mutex_lock(&mutex);
7520 ret = proc_dointvec(table, write, buffer, lenp, ppos);
7521 /* make sure that internally we keep jiffies */
7522 /* also, writing zero resets timeslice to default */
7523 if (!ret && write) {
7524 sched_rr_timeslice = sched_rr_timeslice <= 0 ?
7525 RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
7526 }
7527 mutex_unlock(&mutex);
7528 return ret;
7529}
7530
7511int sched_rt_handler(struct ctl_table *table, int write, 7531int sched_rt_handler(struct ctl_table *table, int write,
7512 void __user *buffer, size_t *lenp, 7532 void __user *buffer, size_t *lenp,
7513 loff_t *ppos) 7533 loff_t *ppos)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 23aa789c53ee..1095e878a46f 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -28,6 +28,8 @@
28 */ 28 */
29 29
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/sched.h>
32#include <linux/sched/rt.h>
31#include "cpupri.h" 33#include "cpupri.h"
32 34
33/* Convert between a 140 based task->prio, and our 102 based cpupri */ 35/* Convert between a 140 based task->prio, and our 102 based cpupri */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 293b202fcf79..9857329ed280 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -3,6 +3,7 @@
3#include <linux/tsacct_kern.h> 3#include <linux/tsacct_kern.h>
4#include <linux/kernel_stat.h> 4#include <linux/kernel_stat.h>
5#include <linux/static_key.h> 5#include <linux/static_key.h>
6#include <linux/context_tracking.h>
6#include "sched.h" 7#include "sched.h"
7 8
8 9
@@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
163 task_group_account_field(p, index, (__force u64) cputime); 164 task_group_account_field(p, index, (__force u64) cputime);
164 165
165 /* Account for user time used */ 166 /* Account for user time used */
166 acct_update_integrals(p); 167 acct_account_cputime(p);
167} 168}
168 169
169/* 170/*
@@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
213 task_group_account_field(p, index, (__force u64) cputime); 214 task_group_account_field(p, index, (__force u64) cputime);
214 215
215 /* Account for system time used */ 216 /* Account for system time used */
216 acct_update_integrals(p); 217 acct_account_cputime(p);
217} 218}
218 219
219/* 220/*
@@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void)
295void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 296void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
296{ 297{
297 struct signal_struct *sig = tsk->signal; 298 struct signal_struct *sig = tsk->signal;
299 cputime_t utime, stime;
298 struct task_struct *t; 300 struct task_struct *t;
299 301
300 times->utime = sig->utime; 302 times->utime = sig->utime;
@@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
308 310
309 t = tsk; 311 t = tsk;
310 do { 312 do {
311 times->utime += t->utime; 313 task_cputime(tsk, &utime, &stime);
312 times->stime += t->stime; 314 times->utime += utime;
315 times->stime += stime;
313 times->sum_exec_runtime += task_sched_runtime(t); 316 times->sum_exec_runtime += task_sched_runtime(t);
314 } while_each_thread(tsk, t); 317 } while_each_thread(tsk, t);
315out: 318out:
316 rcu_read_unlock(); 319 rcu_read_unlock();
317} 320}
318 321
319#ifndef CONFIG_VIRT_CPU_ACCOUNTING
320
321#ifdef CONFIG_IRQ_TIME_ACCOUNTING 322#ifdef CONFIG_IRQ_TIME_ACCOUNTING
322/* 323/*
323 * Account a tick to a process and cpustat 324 * Account a tick to a process and cpustat
@@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks)
382 irqtime_account_process_tick(current, 0, rq); 383 irqtime_account_process_tick(current, 0, rq);
383} 384}
384#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 385#else /* CONFIG_IRQ_TIME_ACCOUNTING */
385static void irqtime_account_idle_ticks(int ticks) {} 386static inline void irqtime_account_idle_ticks(int ticks) {}
386static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 387static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
387 struct rq *rq) {} 388 struct rq *rq) {}
388#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 389#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
389 390
391#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
390/* 392/*
391 * Account a single tick of cpu time. 393 * Account a single tick of cpu time.
392 * @p: the process that the cpu time gets accounted to 394 * @p: the process that the cpu time gets accounted to
@@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
397 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 399 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
398 struct rq *rq = this_rq(); 400 struct rq *rq = this_rq();
399 401
402 if (vtime_accounting_enabled())
403 return;
404
400 if (sched_clock_irqtime) { 405 if (sched_clock_irqtime) {
401 irqtime_account_process_tick(p, user_tick, rq); 406 irqtime_account_process_tick(p, user_tick, rq);
402 return; 407 return;
@@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks)
438 443
439 account_idle_time(jiffies_to_cputime(ticks)); 444 account_idle_time(jiffies_to_cputime(ticks));
440} 445}
441 446#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
442#endif
443 447
444/* 448/*
445 * Use precise platform statistics if available: 449 * Use precise platform statistics if available:
@@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
461 *st = cputime.stime; 465 *st = cputime.stime;
462} 466}
463 467
464void vtime_account_system_irqsafe(struct task_struct *tsk)
465{
466 unsigned long flags;
467
468 local_irq_save(flags);
469 vtime_account_system(tsk);
470 local_irq_restore(flags);
471}
472EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
473
474#ifndef __ARCH_HAS_VTIME_TASK_SWITCH 468#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
475void vtime_task_switch(struct task_struct *prev) 469void vtime_task_switch(struct task_struct *prev)
476{ 470{
471 if (!vtime_accounting_enabled())
472 return;
473
477 if (is_idle_task(prev)) 474 if (is_idle_task(prev))
478 vtime_account_idle(prev); 475 vtime_account_idle(prev);
479 else 476 else
480 vtime_account_system(prev); 477 vtime_account_system(prev);
481 478
479#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
482 vtime_account_user(prev); 480 vtime_account_user(prev);
481#endif
483 arch_vtime_task_switch(prev); 482 arch_vtime_task_switch(prev);
484} 483}
485#endif 484#endif
@@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev)
493 * vtime_account(). 492 * vtime_account().
494 */ 493 */
495#ifndef __ARCH_HAS_VTIME_ACCOUNT 494#ifndef __ARCH_HAS_VTIME_ACCOUNT
496void vtime_account(struct task_struct *tsk) 495void vtime_account_irq_enter(struct task_struct *tsk)
497{ 496{
498 if (in_interrupt() || !is_idle_task(tsk)) 497 if (!vtime_accounting_enabled())
499 vtime_account_system(tsk); 498 return;
500 else 499
501 vtime_account_idle(tsk); 500 if (!in_interrupt()) {
501 /*
502 * If we interrupted user, context_tracking_in_user()
503 * is 1 because the context tracking don't hook
504 * on irq entry/exit. This way we know if
505 * we need to flush user time on kernel entry.
506 */
507 if (context_tracking_in_user()) {
508 vtime_account_user(tsk);
509 return;
510 }
511
512 if (is_idle_task(tsk)) {
513 vtime_account_idle(tsk);
514 return;
515 }
516 }
517 vtime_account_system(tsk);
502} 518}
503EXPORT_SYMBOL_GPL(vtime_account); 519EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
504#endif /* __ARCH_HAS_VTIME_ACCOUNT */ 520#endif /* __ARCH_HAS_VTIME_ACCOUNT */
505 521
506#else 522#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
507
508#ifndef nsecs_to_cputime
509# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
510#endif
511 523
512static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) 524static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)
513{ 525{
514 u64 temp = (__force u64) rtime; 526 u64 temp = (__force u64) rtime;
515 527
516 temp *= (__force u64) utime; 528 temp *= (__force u64) stime;
517 529
518 if (sizeof(cputime_t) == 4) 530 if (sizeof(cputime_t) == 4)
519 temp = div_u64(temp, (__force u32) total); 531 temp = div_u64(temp, (__force u32) total);
@@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr,
531 struct cputime *prev, 543 struct cputime *prev,
532 cputime_t *ut, cputime_t *st) 544 cputime_t *ut, cputime_t *st)
533{ 545{
534 cputime_t rtime, utime, total; 546 cputime_t rtime, stime, total;
535 547
536 utime = curr->utime; 548 stime = curr->stime;
537 total = utime + curr->stime; 549 total = stime + curr->utime;
538 550
539 /* 551 /*
540 * Tick based cputime accounting depend on random scheduling 552 * Tick based cputime accounting depend on random scheduling
@@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr,
549 rtime = nsecs_to_cputime(curr->sum_exec_runtime); 561 rtime = nsecs_to_cputime(curr->sum_exec_runtime);
550 562
551 if (total) 563 if (total)
552 utime = scale_utime(utime, rtime, total); 564 stime = scale_stime(stime, rtime, total);
553 else 565 else
554 utime = rtime; 566 stime = rtime;
555 567
556 /* 568 /*
557 * If the tick based count grows faster than the scheduler one, 569 * If the tick based count grows faster than the scheduler one,
558 * the result of the scaling may go backward. 570 * the result of the scaling may go backward.
559 * Let's enforce monotonicity. 571 * Let's enforce monotonicity.
560 */ 572 */
561 prev->utime = max(prev->utime, utime); 573 prev->stime = max(prev->stime, stime);
562 prev->stime = max(prev->stime, rtime - prev->utime); 574 prev->utime = max(prev->utime, rtime - prev->stime);
563 575
564 *ut = prev->utime; 576 *ut = prev->utime;
565 *st = prev->stime; 577 *st = prev->stime;
@@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr,
568void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 580void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
569{ 581{
570 struct task_cputime cputime = { 582 struct task_cputime cputime = {
571 .utime = p->utime,
572 .stime = p->stime,
573 .sum_exec_runtime = p->se.sum_exec_runtime, 583 .sum_exec_runtime = p->se.sum_exec_runtime,
574 }; 584 };
575 585
586 task_cputime(p, &cputime.utime, &cputime.stime);
576 cputime_adjust(&cputime, &p->prev_cputime, ut, st); 587 cputime_adjust(&cputime, &p->prev_cputime, ut, st);
577} 588}
578 589
@@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
586 thread_group_cputime(p, &cputime); 597 thread_group_cputime(p, &cputime);
587 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 598 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
588} 599}
589#endif 600#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
601
602#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
603static unsigned long long vtime_delta(struct task_struct *tsk)
604{
605 unsigned long long clock;
606
607 clock = sched_clock();
608 if (clock < tsk->vtime_snap)
609 return 0;
610
611 return clock - tsk->vtime_snap;
612}
613
614static cputime_t get_vtime_delta(struct task_struct *tsk)
615{
616 unsigned long long delta = vtime_delta(tsk);
617
618 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
619 tsk->vtime_snap += delta;
620
621 /* CHECKME: always safe to convert nsecs to cputime? */
622 return nsecs_to_cputime(delta);
623}
624
625static void __vtime_account_system(struct task_struct *tsk)
626{
627 cputime_t delta_cpu = get_vtime_delta(tsk);
628
629 account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
630}
631
632void vtime_account_system(struct task_struct *tsk)
633{
634 if (!vtime_accounting_enabled())
635 return;
636
637 write_seqlock(&tsk->vtime_seqlock);
638 __vtime_account_system(tsk);
639 write_sequnlock(&tsk->vtime_seqlock);
640}
641
642void vtime_account_irq_exit(struct task_struct *tsk)
643{
644 if (!vtime_accounting_enabled())
645 return;
646
647 write_seqlock(&tsk->vtime_seqlock);
648 if (context_tracking_in_user())
649 tsk->vtime_snap_whence = VTIME_USER;
650 __vtime_account_system(tsk);
651 write_sequnlock(&tsk->vtime_seqlock);
652}
653
654void vtime_account_user(struct task_struct *tsk)
655{
656 cputime_t delta_cpu;
657
658 if (!vtime_accounting_enabled())
659 return;
660
661 delta_cpu = get_vtime_delta(tsk);
662
663 write_seqlock(&tsk->vtime_seqlock);
664 tsk->vtime_snap_whence = VTIME_SYS;
665 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
666 write_sequnlock(&tsk->vtime_seqlock);
667}
668
669void vtime_user_enter(struct task_struct *tsk)
670{
671 if (!vtime_accounting_enabled())
672 return;
673
674 write_seqlock(&tsk->vtime_seqlock);
675 tsk->vtime_snap_whence = VTIME_USER;
676 __vtime_account_system(tsk);
677 write_sequnlock(&tsk->vtime_seqlock);
678}
679
680void vtime_guest_enter(struct task_struct *tsk)
681{
682 write_seqlock(&tsk->vtime_seqlock);
683 __vtime_account_system(tsk);
684 current->flags |= PF_VCPU;
685 write_sequnlock(&tsk->vtime_seqlock);
686}
687
688void vtime_guest_exit(struct task_struct *tsk)
689{
690 write_seqlock(&tsk->vtime_seqlock);
691 __vtime_account_system(tsk);
692 current->flags &= ~PF_VCPU;
693 write_sequnlock(&tsk->vtime_seqlock);
694}
695
696void vtime_account_idle(struct task_struct *tsk)
697{
698 cputime_t delta_cpu = get_vtime_delta(tsk);
699
700 account_idle_time(delta_cpu);
701}
702
703bool vtime_accounting_enabled(void)
704{
705 return context_tracking_active();
706}
707
708void arch_vtime_task_switch(struct task_struct *prev)
709{
710 write_seqlock(&prev->vtime_seqlock);
711 prev->vtime_snap_whence = VTIME_SLEEPING;
712 write_sequnlock(&prev->vtime_seqlock);
713
714 write_seqlock(&current->vtime_seqlock);
715 current->vtime_snap_whence = VTIME_SYS;
716 current->vtime_snap = sched_clock();
717 write_sequnlock(&current->vtime_seqlock);
718}
719
720void vtime_init_idle(struct task_struct *t)
721{
722 unsigned long flags;
723
724 write_seqlock_irqsave(&t->vtime_seqlock, flags);
725 t->vtime_snap_whence = VTIME_SYS;
726 t->vtime_snap = sched_clock();
727 write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
728}
729
730cputime_t task_gtime(struct task_struct *t)
731{
732 unsigned int seq;
733 cputime_t gtime;
734
735 do {
736 seq = read_seqbegin(&t->vtime_seqlock);
737
738 gtime = t->gtime;
739 if (t->flags & PF_VCPU)
740 gtime += vtime_delta(t);
741
742 } while (read_seqretry(&t->vtime_seqlock, seq));
743
744 return gtime;
745}
746
747/*
748 * Fetch cputime raw values from fields of task_struct and
749 * add up the pending nohz execution time since the last
750 * cputime snapshot.
751 */
752static void
753fetch_task_cputime(struct task_struct *t,
754 cputime_t *u_dst, cputime_t *s_dst,
755 cputime_t *u_src, cputime_t *s_src,
756 cputime_t *udelta, cputime_t *sdelta)
757{
758 unsigned int seq;
759 unsigned long long delta;
760
761 do {
762 *udelta = 0;
763 *sdelta = 0;
764
765 seq = read_seqbegin(&t->vtime_seqlock);
766
767 if (u_dst)
768 *u_dst = *u_src;
769 if (s_dst)
770 *s_dst = *s_src;
771
772 /* Task is sleeping, nothing to add */
773 if (t->vtime_snap_whence == VTIME_SLEEPING ||
774 is_idle_task(t))
775 continue;
776
777 delta = vtime_delta(t);
778
779 /*
780 * Task runs either in user or kernel space, add pending nohz time to
781 * the right place.
782 */
783 if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
784 *udelta = delta;
785 } else {
786 if (t->vtime_snap_whence == VTIME_SYS)
787 *sdelta = delta;
788 }
789 } while (read_seqretry(&t->vtime_seqlock, seq));
790}
791
792
793void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
794{
795 cputime_t udelta, sdelta;
796
797 fetch_task_cputime(t, utime, stime, &t->utime,
798 &t->stime, &udelta, &sdelta);
799 if (utime)
800 *utime += udelta;
801 if (stime)
802 *stime += sdelta;
803}
804
805void task_cputime_scaled(struct task_struct *t,
806 cputime_t *utimescaled, cputime_t *stimescaled)
807{
808 cputime_t udelta, sdelta;
809
810 fetch_task_cputime(t, utimescaled, stimescaled,
811 &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
812 if (utimescaled)
813 *utimescaled += cputime_to_scaled(udelta);
814 if (stimescaled)
815 *stimescaled += cputime_to_scaled(sdelta);
816}
817#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 81fa53643409..7a33e5986fc5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
1680 } 1680 }
1681 1681
1682 /* ensure we never gain time by being placed backwards. */ 1682 /* ensure we never gain time by being placed backwards. */
1683 vruntime = max_vruntime(se->vruntime, vruntime); 1683 se->vruntime = max_vruntime(se->vruntime, vruntime);
1684
1685 se->vruntime = vruntime;
1686} 1684}
1687 1685
1688static void check_enqueue_throttle(struct cfs_rq *cfs_rq); 1686static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -3254,25 +3252,18 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
3254 */ 3252 */
3255static int select_idle_sibling(struct task_struct *p, int target) 3253static int select_idle_sibling(struct task_struct *p, int target)
3256{ 3254{
3257 int cpu = smp_processor_id();
3258 int prev_cpu = task_cpu(p);
3259 struct sched_domain *sd; 3255 struct sched_domain *sd;
3260 struct sched_group *sg; 3256 struct sched_group *sg;
3261 int i; 3257 int i = task_cpu(p);
3262 3258
3263 /* 3259 if (idle_cpu(target))
3264 * If the task is going to be woken-up on this cpu and if it is 3260 return target;
3265 * already idle, then it is the right target.
3266 */
3267 if (target == cpu && idle_cpu(cpu))
3268 return cpu;
3269 3261
3270 /* 3262 /*
3271 * If the task is going to be woken-up on the cpu where it previously 3263 * If the prevous cpu is cache affine and idle, don't be stupid.
3272 * ran and if it is currently idle, then it the right target.
3273 */ 3264 */
3274 if (target == prev_cpu && idle_cpu(prev_cpu)) 3265 if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
3275 return prev_cpu; 3266 return i;
3276 3267
3277 /* 3268 /*
3278 * Otherwise, iterate the domains and find an elegible idle cpu. 3269 * Otherwise, iterate the domains and find an elegible idle cpu.
@@ -3286,7 +3277,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
3286 goto next; 3277 goto next;
3287 3278
3288 for_each_cpu(i, sched_group_cpus(sg)) { 3279 for_each_cpu(i, sched_group_cpus(sg)) {
3289 if (!idle_cpu(i)) 3280 if (i == target || !idle_cpu(i))
3290 goto next; 3281 goto next;
3291 } 3282 }
3292 3283
@@ -6101,7 +6092,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
6101 * idle runqueue: 6092 * idle runqueue:
6102 */ 6093 */
6103 if (rq->cfs.load.weight) 6094 if (rq->cfs.load.weight)
6104 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); 6095 rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
6105 6096
6106 return rr_interval; 6097 return rr_interval;
6107} 6098}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4f02b2847357..127a2c4cf4ab 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,8 @@
7 7
8#include <linux/slab.h> 8#include <linux/slab.h>
9 9
10int sched_rr_timeslice = RR_TIMESLICE;
11
10static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 12static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
11 13
12struct rt_bandwidth def_rt_bandwidth; 14struct rt_bandwidth def_rt_bandwidth;
@@ -925,8 +927,8 @@ static void update_curr_rt(struct rq *rq)
925 return; 927 return;
926 928
927 delta_exec = rq->clock_task - curr->se.exec_start; 929 delta_exec = rq->clock_task - curr->se.exec_start;
928 if (unlikely((s64)delta_exec < 0)) 930 if (unlikely((s64)delta_exec <= 0))
929 delta_exec = 0; 931 return;
930 932
931 schedstat_set(curr->se.statistics.exec_max, 933 schedstat_set(curr->se.statistics.exec_max,
932 max(curr->se.statistics.exec_max, delta_exec)); 934 max(curr->se.statistics.exec_max, delta_exec));
@@ -1427,8 +1429,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1427static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1429static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1428{ 1430{
1429 if (!task_running(rq, p) && 1431 if (!task_running(rq, p) &&
1430 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && 1432 cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
1431 (p->nr_cpus_allowed > 1))
1432 return 1; 1433 return 1;
1433 return 0; 1434 return 0;
1434} 1435}
@@ -1889,8 +1890,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
1889 * we may need to handle the pulling of RT tasks 1890 * we may need to handle the pulling of RT tasks
1890 * now. 1891 * now.
1891 */ 1892 */
1892 if (p->on_rq && !rq->rt.rt_nr_running) 1893 if (!p->on_rq || rq->rt.rt_nr_running)
1893 pull_rt_task(rq); 1894 return;
1895
1896 if (pull_rt_task(rq))
1897 resched_task(rq->curr);
1894} 1898}
1895 1899
1896void init_sched_rt_class(void) 1900void init_sched_rt_class(void)
@@ -1985,7 +1989,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1985 if (soft != RLIM_INFINITY) { 1989 if (soft != RLIM_INFINITY) {
1986 unsigned long next; 1990 unsigned long next;
1987 1991
1988 p->rt.timeout++; 1992 if (p->rt.watchdog_stamp != jiffies) {
1993 p->rt.timeout++;
1994 p->rt.watchdog_stamp = jiffies;
1995 }
1996
1989 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 1997 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1990 if (p->rt.timeout > next) 1998 if (p->rt.timeout > next)
1991 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 1999 p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@@ -2010,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2010 if (--p->rt.time_slice) 2018 if (--p->rt.time_slice)
2011 return; 2019 return;
2012 2020
2013 p->rt.time_slice = RR_TIMESLICE; 2021 p->rt.time_slice = sched_rr_timeslice;
2014 2022
2015 /* 2023 /*
2016 * Requeue to the end of queue if we (and all of our ancestors) are the 2024 * Requeue to the end of queue if we (and all of our ancestors) are the
@@ -2041,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2041 * Time slice is 0 for SCHED_FIFO tasks 2049 * Time slice is 0 for SCHED_FIFO tasks
2042 */ 2050 */
2043 if (task->policy == SCHED_RR) 2051 if (task->policy == SCHED_RR)
2044 return RR_TIMESLICE; 2052 return sched_rr_timeslice;
2045 else 2053 else
2046 return 0; 2054 return 0;
2047} 2055}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fc886441436a..cc03cfdf469f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,5 +1,7 @@
1 1
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/sched/sysctl.h>
4#include <linux/sched/rt.h>
3#include <linux/mutex.h> 5#include <linux/mutex.h>
4#include <linux/spinlock.h> 6#include <linux/spinlock.h>
5#include <linux/stop_machine.h> 7#include <linux/stop_machine.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 3d09cf6cde75..7f82adbad480 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1632,6 +1632,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
1632 unsigned long flags; 1632 unsigned long flags;
1633 struct sighand_struct *psig; 1633 struct sighand_struct *psig;
1634 bool autoreap = false; 1634 bool autoreap = false;
1635 cputime_t utime, stime;
1635 1636
1636 BUG_ON(sig == -1); 1637 BUG_ON(sig == -1);
1637 1638
@@ -1669,8 +1670,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
1669 task_uid(tsk)); 1670 task_uid(tsk));
1670 rcu_read_unlock(); 1671 rcu_read_unlock();
1671 1672
1672 info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); 1673 task_cputime(tsk, &utime, &stime);
1673 info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime); 1674 info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
1675 info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
1674 1676
1675 info.si_status = tsk->exit_code & 0x7f; 1677 info.si_status = tsk->exit_code & 0x7f;
1676 if (tsk->exit_code & 0x80) 1678 if (tsk->exit_code & 0x80)
@@ -1734,6 +1736,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
1734 unsigned long flags; 1736 unsigned long flags;
1735 struct task_struct *parent; 1737 struct task_struct *parent;
1736 struct sighand_struct *sighand; 1738 struct sighand_struct *sighand;
1739 cputime_t utime, stime;
1737 1740
1738 if (for_ptracer) { 1741 if (for_ptracer) {
1739 parent = tsk->parent; 1742 parent = tsk->parent;
@@ -1752,8 +1755,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
1752 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); 1755 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
1753 rcu_read_unlock(); 1756 rcu_read_unlock();
1754 1757
1755 info.si_utime = cputime_to_clock_t(tsk->utime); 1758 task_cputime(tsk, &utime, &stime);
1756 info.si_stime = cputime_to_clock_t(tsk->stime); 1759 info.si_utime = cputime_to_clock_t(utime);
1760 info.si_stime = cputime_to_clock_t(stime);
1757 1761
1758 info.si_code = why; 1762 info.si_code = why;
1759 switch (why) { 1763 switch (why) {
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ed567babe789..f5cc25f147a6 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
221 current->flags &= ~PF_MEMALLOC; 221 current->flags &= ~PF_MEMALLOC;
222 222
223 pending = local_softirq_pending(); 223 pending = local_softirq_pending();
224 vtime_account_irq_enter(current); 224 account_irq_enter_time(current);
225 225
226 __local_bh_disable((unsigned long)__builtin_return_address(0), 226 __local_bh_disable((unsigned long)__builtin_return_address(0),
227 SOFTIRQ_OFFSET); 227 SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ restart:
272 272
273 lockdep_softirq_exit(); 273 lockdep_softirq_exit();
274 274
275 vtime_account_irq_exit(current); 275 account_irq_exit_time(current);
276 __local_bh_enable(SOFTIRQ_OFFSET); 276 __local_bh_enable(SOFTIRQ_OFFSET);
277 tsk_restore_flags(current, old_flags, PF_MEMALLOC); 277 tsk_restore_flags(current, old_flags, PF_MEMALLOC);
278} 278}
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
341 */ 341 */
342void irq_exit(void) 342void irq_exit(void)
343{ 343{
344 vtime_account_irq_exit(current); 344 account_irq_exit_time(current);
345 trace_hardirq_exit(); 345 trace_hardirq_exit();
346 sub_preempt_count(IRQ_EXIT_OFFSET); 346 sub_preempt_count(IRQ_EXIT_OFFSET);
347 if (!in_interrupt() && local_softirq_pending()) 347 if (!in_interrupt() && local_softirq_pending())
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c88878db491e..4fc9be955c71 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,7 @@
61#include <linux/kmod.h> 61#include <linux/kmod.h>
62#include <linux/capability.h> 62#include <linux/capability.h>
63#include <linux/binfmts.h> 63#include <linux/binfmts.h>
64#include <linux/sched/sysctl.h>
64 65
65#include <asm/uaccess.h> 66#include <asm/uaccess.h>
66#include <asm/processor.h> 67#include <asm/processor.h>
@@ -403,6 +404,13 @@ static struct ctl_table kern_table[] = {
403 .mode = 0644, 404 .mode = 0644,
404 .proc_handler = sched_rt_handler, 405 .proc_handler = sched_rt_handler,
405 }, 406 },
407 {
408 .procname = "sched_rr_timeslice_ms",
409 .data = &sched_rr_timeslice,
410 .maxlen = sizeof(int),
411 .mode = 0644,
412 .proc_handler = sched_rr_handler,
413 },
406#ifdef CONFIG_SCHED_AUTOGROUP 414#ifdef CONFIG_SCHED_AUTOGROUP
407 { 415 {
408 .procname = "sched_autogroup_enabled", 416 .procname = "sched_autogroup_enabled",
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fb8e5e469d1c..314b9ee07edf 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -632,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
632 632
633static void tick_nohz_account_idle_ticks(struct tick_sched *ts) 633static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
634{ 634{
635#ifndef CONFIG_VIRT_CPU_ACCOUNTING 635#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
636 unsigned long ticks; 636 unsigned long ticks;
637
638 if (vtime_accounting_enabled())
639 return;
637 /* 640 /*
638 * We stopped the tick in idle. Update process times would miss the 641 * We stopped the tick in idle. Update process times would miss the
639 * time we slept as update_process_times does only a 1 tick 642 * time we slept as update_process_times does only a 1 tick
diff --git a/kernel/timer.c b/kernel/timer.c
index ff3b5165737b..dbf7a78a1ef1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -39,6 +39,7 @@
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/irq_work.h> 40#include <linux/irq_work.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/sched/sysctl.h>
42#include <linux/slab.h> 43#include <linux/slab.h>
43 44
44#include <asm/uaccess.h> 45#include <asm/uaccess.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5d520b7bb4c5..c2e2c2310374 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
39#include <linux/poll.h> 39#include <linux/poll.h>
40#include <linux/nmi.h> 40#include <linux/nmi.h>
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/sched/rt.h>
42 43
43#include "trace.h" 44#include "trace.h"
44#include "trace_output.h" 45#include "trace_output.h"
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9fe45fcefca0..75aa97fbe1a1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,8 +15,8 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/sched/rt.h>
18#include <trace/events/sched.h> 19#include <trace/events/sched.h>
19
20#include "trace.h" 20#include "trace.h"
21 21
22static struct trace_array *wakeup_trace; 22static struct trace_array *wakeup_trace;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 625df0b44690..a1dd9a1b1327 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
32{ 32{
33 const struct cred *tcred; 33 const struct cred *tcred;
34 struct timespec uptime, ts; 34 struct timespec uptime, ts;
35 cputime_t utime, stime, utimescaled, stimescaled;
35 u64 ac_etime; 36 u64 ac_etime;
36 37
37 BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); 38 BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns,
65 stats->ac_ppid = pid_alive(tsk) ? 66 stats->ac_ppid = pid_alive(tsk) ?
66 task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0; 67 task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
67 rcu_read_unlock(); 68 rcu_read_unlock();
68 stats->ac_utime = cputime_to_usecs(tsk->utime); 69
69 stats->ac_stime = cputime_to_usecs(tsk->stime); 70 task_cputime(tsk, &utime, &stime);
70 stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); 71 stats->ac_utime = cputime_to_usecs(utime);
71 stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); 72 stats->ac_stime = cputime_to_usecs(stime);
73
74 task_cputime_scaled(tsk, &utimescaled, &stimescaled);
75 stats->ac_utimescaled = cputime_to_usecs(utimescaled);
76 stats->ac_stimescaled = cputime_to_usecs(stimescaled);
77
72 stats->ac_minflt = tsk->min_flt; 78 stats->ac_minflt = tsk->min_flt;
73 stats->ac_majflt = tsk->maj_flt; 79 stats->ac_majflt = tsk->maj_flt;
74 80
@@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
115#undef KB 121#undef KB
116#undef MB 122#undef MB
117 123
118/** 124static void __acct_update_integrals(struct task_struct *tsk,
119 * acct_update_integrals - update mm integral fields in task_struct 125 cputime_t utime, cputime_t stime)
120 * @tsk: task_struct for accounting
121 */
122void acct_update_integrals(struct task_struct *tsk)
123{ 126{
124 if (likely(tsk->mm)) { 127 if (likely(tsk->mm)) {
125 cputime_t time, dtime; 128 cputime_t time, dtime;
@@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk)
128 u64 delta; 131 u64 delta;
129 132
130 local_irq_save(flags); 133 local_irq_save(flags);
131 time = tsk->stime + tsk->utime; 134 time = stime + utime;
132 dtime = time - tsk->acct_timexpd; 135 dtime = time - tsk->acct_timexpd;
133 jiffies_to_timeval(cputime_to_jiffies(dtime), &value); 136 jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
134 delta = value.tv_sec; 137 delta = value.tv_sec;
@@ -145,6 +148,27 @@ void acct_update_integrals(struct task_struct *tsk)
145} 148}
146 149
147/** 150/**
151 * acct_update_integrals - update mm integral fields in task_struct
152 * @tsk: task_struct for accounting
153 */
154void acct_update_integrals(struct task_struct *tsk)
155{
156 cputime_t utime, stime;
157
158 task_cputime(tsk, &utime, &stime);
159 __acct_update_integrals(tsk, utime, stime);
160}
161
162/**
163 * acct_account_cputime - update mm integral after cputime update
164 * @tsk: task_struct for accounting
165 */
166void acct_account_cputime(struct task_struct *tsk)
167{
168 __acct_update_integrals(tsk, tsk->utime, tsk->stime);
169}
170
171/**
148 * acct_clear_integrals - clear the mm integral fields in task_struct 172 * acct_clear_integrals - clear the mm integral fields in task_struct
149 * @tsk: task_struct whose accounting fields are cleared 173 * @tsk: task_struct whose accounting fields are cleared
150 */ 174 */
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 75a2ab3d0b02..27689422aa92 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -23,6 +23,7 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <linux/smpboot.h> 25#include <linux/smpboot.h>
26#include <linux/sched/rt.h>
26 27
27#include <asm/irq_regs.h> 28#include <asm/irq_regs.h>
28#include <linux/kvm_para.h> 29#include <linux/kvm_para.h>