aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/cputime.h66
-rw-r--r--include/asm-generic/cputime_jiffies.h72
-rw-r--r--include/asm-generic/cputime_nsecs.h104
-rw-r--r--include/linux/context_tracking.h28
-rw-r--r--include/linux/hardirq.h4
-rw-r--r--include/linux/init_task.h12
-rw-r--r--include/linux/kernel_stat.h2
-rw-r--r--include/linux/kvm_host.h55
-rw-r--r--include/linux/sched.h185
-rw-r--r--include/linux/sched/rt.h58
-rw-r--r--include/linux/sched/sysctl.h110
-rw-r--r--include/linux/tsacct_kern.h3
-rw-r--r--include/linux/vtime.h59
13 files changed, 524 insertions, 234 deletions
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 9a62937c56ca..51969436b8b8 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,66 +4,12 @@
4#include <linux/time.h> 4#include <linux/time.h>
5#include <linux/jiffies.h> 5#include <linux/jiffies.h>
6 6
7typedef unsigned long __nocast cputime_t; 7#ifndef CONFIG_VIRT_CPU_ACCOUNTING
8 8# include <asm-generic/cputime_jiffies.h>
9#define cputime_one_jiffy jiffies_to_cputime(1) 9#endif
10#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
11#define cputime_to_scaled(__ct) (__ct)
12#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
13
14typedef u64 __nocast cputime64_t;
15
16#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
17#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
18
19#define nsecs_to_cputime64(__ct) \
20 jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
21
22
23/*
24 * Convert cputime to microseconds and back.
25 */
26#define cputime_to_usecs(__ct) \
27 jiffies_to_usecs(cputime_to_jiffies(__ct))
28#define usecs_to_cputime(__usec) \
29 jiffies_to_cputime(usecs_to_jiffies(__usec))
30#define usecs_to_cputime64(__usec) \
31 jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
32
33/*
34 * Convert cputime to seconds and back.
35 */
36#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
37#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
38
39/*
40 * Convert cputime to timespec and back.
41 */
42#define timespec_to_cputime(__val) \
43 jiffies_to_cputime(timespec_to_jiffies(__val))
44#define cputime_to_timespec(__ct,__val) \
45 jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
46
47/*
48 * Convert cputime to timeval and back.
49 */
50#define timeval_to_cputime(__val) \
51 jiffies_to_cputime(timeval_to_jiffies(__val))
52#define cputime_to_timeval(__ct,__val) \
53 jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
54
55/*
56 * Convert cputime to clock and back.
57 */
58#define cputime_to_clock_t(__ct) \
59 jiffies_to_clock_t(cputime_to_jiffies(__ct))
60#define clock_t_to_cputime(__x) \
61 jiffies_to_cputime(clock_t_to_jiffies(__x))
62 10
63/* 11#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
64 * Convert cputime64 to clock. 12# include <asm-generic/cputime_nsecs.h>
65 */ 13#endif
66#define cputime64_to_clock_t(__ct) \
67 jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
68 14
69#endif 15#endif
diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h
new file mode 100644
index 000000000000..272ecba9f588
--- /dev/null
+++ b/include/asm-generic/cputime_jiffies.h
@@ -0,0 +1,72 @@
1#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
2#define _ASM_GENERIC_CPUTIME_JIFFIES_H
3
4typedef unsigned long __nocast cputime_t;
5
6#define cputime_one_jiffy jiffies_to_cputime(1)
7#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
8#define cputime_to_scaled(__ct) (__ct)
9#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
10
11typedef u64 __nocast cputime64_t;
12
13#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
14#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
15
16
17/*
18 * Convert nanoseconds to cputime
19 */
20#define nsecs_to_cputime64(__nsec) \
21 jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
22#define nsecs_to_cputime(__nsec) \
23 jiffies_to_cputime(nsecs_to_jiffies(__nsec))
24
25
26/*
27 * Convert cputime to microseconds and back.
28 */
29#define cputime_to_usecs(__ct) \
30 jiffies_to_usecs(cputime_to_jiffies(__ct))
31#define usecs_to_cputime(__usec) \
32 jiffies_to_cputime(usecs_to_jiffies(__usec))
33#define usecs_to_cputime64(__usec) \
34 jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
35
36/*
37 * Convert cputime to seconds and back.
38 */
39#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
40#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
41
42/*
43 * Convert cputime to timespec and back.
44 */
45#define timespec_to_cputime(__val) \
46 jiffies_to_cputime(timespec_to_jiffies(__val))
47#define cputime_to_timespec(__ct,__val) \
48 jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
49
50/*
51 * Convert cputime to timeval and back.
52 */
53#define timeval_to_cputime(__val) \
54 jiffies_to_cputime(timeval_to_jiffies(__val))
55#define cputime_to_timeval(__ct,__val) \
56 jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
57
58/*
59 * Convert cputime to clock and back.
60 */
61#define cputime_to_clock_t(__ct) \
62 jiffies_to_clock_t(cputime_to_jiffies(__ct))
63#define clock_t_to_cputime(__x) \
64 jiffies_to_cputime(clock_t_to_jiffies(__x))
65
66/*
67 * Convert cputime64 to clock.
68 */
69#define cputime64_to_clock_t(__ct) \
70 jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
71
72#endif
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
new file mode 100644
index 000000000000..b6485cafb7bd
--- /dev/null
+++ b/include/asm-generic/cputime_nsecs.h
@@ -0,0 +1,104 @@
1/*
2 * Definitions for measuring cputime in nsecs resolution.
3 *
4 * Based on <arch/ia64/include/asm/cputime.h>
5 *
6 * Copyright (C) 2007 FUJITSU LIMITED
7 * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 *
14 */
15
16#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
17#define _ASM_GENERIC_CPUTIME_NSECS_H
18
19typedef u64 __nocast cputime_t;
20typedef u64 __nocast cputime64_t;
21
22#define cputime_one_jiffy jiffies_to_cputime(1)
23
24/*
25 * Convert cputime <-> jiffies (HZ)
26 */
27#define cputime_to_jiffies(__ct) \
28 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
29#define cputime_to_scaled(__ct) (__ct)
30#define jiffies_to_cputime(__jif) \
31 (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
32#define cputime64_to_jiffies64(__ct) \
33 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
34#define jiffies64_to_cputime64(__jif) \
35 (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
36
37
38/*
39 * Convert cputime <-> nanoseconds
40 */
41#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs))
42
43
44/*
45 * Convert cputime <-> microseconds
46 */
47#define cputime_to_usecs(__ct) \
48 ((__force u64)(__ct) / NSEC_PER_USEC)
49#define usecs_to_cputime(__usecs) \
50 (__force cputime_t)((__usecs) * NSEC_PER_USEC)
51#define usecs_to_cputime64(__usecs) \
52 (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
53
54/*
55 * Convert cputime <-> seconds
56 */
57#define cputime_to_secs(__ct) \
58 ((__force u64)(__ct) / NSEC_PER_SEC)
59#define secs_to_cputime(__secs) \
60 (__force cputime_t)((__secs) * NSEC_PER_SEC)
61
62/*
63 * Convert cputime <-> timespec (nsec)
64 */
65static inline cputime_t timespec_to_cputime(const struct timespec *val)
66{
67 u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
68 return (__force cputime_t) ret;
69}
70static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
71{
72 val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
73 val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
74}
75
76/*
77 * Convert cputime <-> timeval (msec)
78 */
79static inline cputime_t timeval_to_cputime(struct timeval *val)
80{
81 u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
82 return (__force cputime_t) ret;
83}
84static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
85{
86 val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
87 val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
88}
89
90/*
91 * Convert cputime <-> clock (USER_HZ)
92 */
93#define cputime_to_clock_t(__ct) \
94 ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
95#define clock_t_to_cputime(__x) \
96 (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
97
98/*
99 * Convert cputime64 to clock.
100 */
101#define cputime64_to_clock_t(__ct) \
102 cputime_to_clock_t((__force cputime_t)__ct)
103
104#endif
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index e24339ccb7f0..b28d161c1091 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -3,12 +3,40 @@
3 3
4#ifdef CONFIG_CONTEXT_TRACKING 4#ifdef CONFIG_CONTEXT_TRACKING
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/percpu.h>
7
8struct context_tracking {
9 /*
10 * When active is false, probes are unset in order
11 * to minimize overhead: TIF flags are cleared
12 * and calls to user_enter/exit are ignored. This
13 * may be further optimized using static keys.
14 */
15 bool active;
16 enum {
17 IN_KERNEL = 0,
18 IN_USER,
19 } state;
20};
21
22DECLARE_PER_CPU(struct context_tracking, context_tracking);
23
24static inline bool context_tracking_in_user(void)
25{
26 return __this_cpu_read(context_tracking.state) == IN_USER;
27}
28
29static inline bool context_tracking_active(void)
30{
31 return __this_cpu_read(context_tracking.active);
32}
6 33
7extern void user_enter(void); 34extern void user_enter(void);
8extern void user_exit(void); 35extern void user_exit(void);
9extern void context_tracking_task_switch(struct task_struct *prev, 36extern void context_tracking_task_switch(struct task_struct *prev,
10 struct task_struct *next); 37 struct task_struct *next);
11#else 38#else
39static inline bool context_tracking_in_user(void) { return false; }
12static inline void user_enter(void) { } 40static inline void user_enter(void) { }
13static inline void user_exit(void) { } 41static inline void user_exit(void) { }
14static inline void context_tracking_task_switch(struct task_struct *prev, 42static inline void context_tracking_task_switch(struct task_struct *prev,
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 57bfdce8fb90..29eb805ea4a6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
153 */ 153 */
154#define __irq_enter() \ 154#define __irq_enter() \
155 do { \ 155 do { \
156 vtime_account_irq_enter(current); \ 156 account_irq_enter_time(current); \
157 add_preempt_count(HARDIRQ_OFFSET); \ 157 add_preempt_count(HARDIRQ_OFFSET); \
158 trace_hardirq_enter(); \ 158 trace_hardirq_enter(); \
159 } while (0) 159 } while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
169#define __irq_exit() \ 169#define __irq_exit() \
170 do { \ 170 do { \
171 trace_hardirq_exit(); \ 171 trace_hardirq_exit(); \
172 vtime_account_irq_exit(current); \ 172 account_irq_exit_time(current); \
173 sub_preempt_count(HARDIRQ_OFFSET); \ 173 sub_preempt_count(HARDIRQ_OFFSET); \
174 } while (0) 174 } while (0)
175 175
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5f57f7..5cd0f0949927 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -10,7 +10,9 @@
10#include <linux/pid_namespace.h> 10#include <linux/pid_namespace.h>
11#include <linux/user_namespace.h> 11#include <linux/user_namespace.h>
12#include <linux/securebits.h> 12#include <linux/securebits.h>
13#include <linux/seqlock.h>
13#include <net/net_namespace.h> 14#include <net/net_namespace.h>
15#include <linux/sched/rt.h>
14 16
15#ifdef CONFIG_SMP 17#ifdef CONFIG_SMP
16# define INIT_PUSHABLE_TASKS(tsk) \ 18# define INIT_PUSHABLE_TASKS(tsk) \
@@ -141,6 +143,15 @@ extern struct task_group root_task_group;
141# define INIT_PERF_EVENTS(tsk) 143# define INIT_PERF_EVENTS(tsk)
142#endif 144#endif
143 145
146#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
147# define INIT_VTIME(tsk) \
148 .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
149 .vtime_snap = 0, \
150 .vtime_snap_whence = VTIME_SYS,
151#else
152# define INIT_VTIME(tsk)
153#endif
154
144#define INIT_TASK_COMM "swapper" 155#define INIT_TASK_COMM "swapper"
145 156
146/* 157/*
@@ -210,6 +221,7 @@ extern struct task_group root_task_group;
210 INIT_TRACE_RECURSION \ 221 INIT_TRACE_RECURSION \
211 INIT_TASK_RCU_PREEMPT(tsk) \ 222 INIT_TASK_RCU_PREEMPT(tsk) \
212 INIT_CPUSET_SEQ \ 223 INIT_CPUSET_SEQ \
224 INIT_VTIME(tsk) \
213} 225}
214 226
215 227
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66b70780e910..ed5f6ed6eb77 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
127extern void account_steal_time(cputime_t); 127extern void account_steal_time(cputime_t);
128extern void account_idle_time(cputime_t); 128extern void account_idle_time(cputime_t);
129 129
130#ifdef CONFIG_VIRT_CPU_ACCOUNTING 130#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
131static inline void account_process_tick(struct task_struct *tsk, int user) 131static inline void account_process_tick(struct task_struct *tsk, int user)
132{ 132{
133 vtime_account_user(tsk); 133 vtime_account_user(tsk);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c497ab0d03d..b7996a768eb2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -22,6 +22,7 @@
22#include <linux/rcupdate.h> 22#include <linux/rcupdate.h>
23#include <linux/ratelimit.h> 23#include <linux/ratelimit.h>
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/irqflags.h>
25#include <asm/signal.h> 26#include <asm/signal.h>
26 27
27#include <linux/kvm.h> 28#include <linux/kvm.h>
@@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm,
740} 741}
741#endif /* CONFIG_IOMMU_API */ 742#endif /* CONFIG_IOMMU_API */
742 743
743static inline void kvm_guest_enter(void) 744static inline void __guest_enter(void)
744{ 745{
745 BUG_ON(preemptible());
746 /* 746 /*
747 * This is running in ioctl context so we can avoid 747 * This is running in ioctl context so we can avoid
748 * the call to vtime_account() with its unnecessary idle check. 748 * the call to vtime_account() with its unnecessary idle check.
749 */ 749 */
750 vtime_account_system_irqsafe(current); 750 vtime_account_system(current);
751 current->flags |= PF_VCPU; 751 current->flags |= PF_VCPU;
752}
753
754static inline void __guest_exit(void)
755{
756 /*
757 * This is running in ioctl context so we can avoid
758 * the call to vtime_account() with its unnecessary idle check.
759 */
760 vtime_account_system(current);
761 current->flags &= ~PF_VCPU;
762}
763
764#ifdef CONFIG_CONTEXT_TRACKING
765extern void guest_enter(void);
766extern void guest_exit(void);
767
768#else /* !CONFIG_CONTEXT_TRACKING */
769static inline void guest_enter(void)
770{
771 __guest_enter();
772}
773
774static inline void guest_exit(void)
775{
776 __guest_exit();
777}
778#endif /* !CONFIG_CONTEXT_TRACKING */
779
780static inline void kvm_guest_enter(void)
781{
782 unsigned long flags;
783
784 BUG_ON(preemptible());
785
786 local_irq_save(flags);
787 guest_enter();
788 local_irq_restore(flags);
789
752 /* KVM does not hold any references to rcu protected data when it 790 /* KVM does not hold any references to rcu protected data when it
753 * switches CPU into a guest mode. In fact switching to a guest mode 791 * switches CPU into a guest mode. In fact switching to a guest mode
754 * is very similar to exiting to userspase from rcu point of view. In 792 * is very similar to exiting to userspase from rcu point of view. In
@@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void)
761 799
762static inline void kvm_guest_exit(void) 800static inline void kvm_guest_exit(void)
763{ 801{
764 /* 802 unsigned long flags;
765 * This is running in ioctl context so we can avoid 803
766 * the call to vtime_account() with its unnecessary idle check. 804 local_irq_save(flags);
767 */ 805 guest_exit();
768 vtime_account_system_irqsafe(current); 806 local_irq_restore(flags);
769 current->flags &= ~PF_VCPU;
770} 807}
771 808
772/* 809/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2112477ff5e..33cc42130371 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
304} 304}
305#endif 305#endif
306 306
307#ifdef CONFIG_DETECT_HUNG_TASK
308extern unsigned int sysctl_hung_task_panic;
309extern unsigned long sysctl_hung_task_check_count;
310extern unsigned long sysctl_hung_task_timeout_secs;
311extern unsigned long sysctl_hung_task_warnings;
312extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
313 void __user *buffer,
314 size_t *lenp, loff_t *ppos);
315#else
316/* Avoid need for ifdefs elsewhere in the code */
317enum { sysctl_hung_task_timeout_secs = 0 };
318#endif
319
320/* Attach to any functions which should be ignored in wchan output. */ 307/* Attach to any functions which should be ignored in wchan output. */
321#define __sched __attribute__((__section__(".sched.text"))) 308#define __sched __attribute__((__section__(".sched.text")))
322 309
@@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
338struct nsproxy; 325struct nsproxy;
339struct user_namespace; 326struct user_namespace;
340 327
341/*
342 * Default maximum number of active map areas, this limits the number of vmas
343 * per mm struct. Users can overwrite this number by sysctl but there is a
344 * problem.
345 *
346 * When a program's coredump is generated as ELF format, a section is created
347 * per a vma. In ELF, the number of sections is represented in unsigned short.
348 * This means the number of sections should be smaller than 65535 at coredump.
349 * Because the kernel adds some informative sections to a image of program at
350 * generating coredump, we need some margin. The number of extra sections is
351 * 1-3 now and depends on arch. We use "5" as safe margin, here.
352 */
353#define MAPCOUNT_ELF_CORE_MARGIN (5)
354#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
355
356extern int sysctl_max_map_count;
357
358#include <linux/aio.h> 328#include <linux/aio.h>
359 329
360#ifdef CONFIG_MMU 330#ifdef CONFIG_MMU
@@ -1194,6 +1164,7 @@ struct sched_entity {
1194 /* rq "owned" by this entity/group: */ 1164 /* rq "owned" by this entity/group: */
1195 struct cfs_rq *my_q; 1165 struct cfs_rq *my_q;
1196#endif 1166#endif
1167
1197/* 1168/*
1198 * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be 1169 * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
1199 * removed when useful for applications beyond shares distribution (e.g. 1170 * removed when useful for applications beyond shares distribution (e.g.
@@ -1208,6 +1179,7 @@ struct sched_entity {
1208struct sched_rt_entity { 1179struct sched_rt_entity {
1209 struct list_head run_list; 1180 struct list_head run_list;
1210 unsigned long timeout; 1181 unsigned long timeout;
1182 unsigned long watchdog_stamp;
1211 unsigned int time_slice; 1183 unsigned int time_slice;
1212 1184
1213 struct sched_rt_entity *back; 1185 struct sched_rt_entity *back;
@@ -1220,11 +1192,6 @@ struct sched_rt_entity {
1220#endif 1192#endif
1221}; 1193};
1222 1194
1223/*
1224 * default timeslice is 100 msecs (used only for SCHED_RR tasks).
1225 * Timeslices get refilled after they expire.
1226 */
1227#define RR_TIMESLICE (100 * HZ / 1000)
1228 1195
1229struct rcu_node; 1196struct rcu_node;
1230 1197
@@ -1368,6 +1335,15 @@ struct task_struct {
1368#ifndef CONFIG_VIRT_CPU_ACCOUNTING 1335#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1369 struct cputime prev_cputime; 1336 struct cputime prev_cputime;
1370#endif 1337#endif
1338#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1339 seqlock_t vtime_seqlock;
1340 unsigned long long vtime_snap;
1341 enum {
1342 VTIME_SLEEPING = 0,
1343 VTIME_USER,
1344 VTIME_SYS,
1345 } vtime_snap_whence;
1346#endif
1371 unsigned long nvcsw, nivcsw; /* context switch counts */ 1347 unsigned long nvcsw, nivcsw; /* context switch counts */
1372 struct timespec start_time; /* monotonic time */ 1348 struct timespec start_time; /* monotonic time */
1373 struct timespec real_start_time; /* boot based time */ 1349 struct timespec real_start_time; /* boot based time */
@@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
1622} 1598}
1623#endif 1599#endif
1624 1600
1625/*
1626 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
1627 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
1628 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
1629 * values are inverted: lower p->prio value means higher priority.
1630 *
1631 * The MAX_USER_RT_PRIO value allows the actual maximum
1632 * RT priority to be separate from the value exported to
1633 * user-space. This allows kernel threads to set their
1634 * priority to a value higher than any user task. Note:
1635 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
1636 */
1637
1638#define MAX_USER_RT_PRIO 100
1639#define MAX_RT_PRIO MAX_USER_RT_PRIO
1640
1641#define MAX_PRIO (MAX_RT_PRIO + 40)
1642#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
1643
1644static inline int rt_prio(int prio)
1645{
1646 if (unlikely(prio < MAX_RT_PRIO))
1647 return 1;
1648 return 0;
1649}
1650
1651static inline int rt_task(struct task_struct *p)
1652{
1653 return rt_prio(p->prio);
1654}
1655
1656static inline struct pid *task_pid(struct task_struct *task) 1601static inline struct pid *task_pid(struct task_struct *task)
1657{ 1602{
1658 return task->pids[PIDTYPE_PID].pid; 1603 return task->pids[PIDTYPE_PID].pid;
@@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t)
1792 __put_task_struct(t); 1737 __put_task_struct(t);
1793} 1738}
1794 1739
1740#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1741extern void task_cputime(struct task_struct *t,
1742 cputime_t *utime, cputime_t *stime);
1743extern void task_cputime_scaled(struct task_struct *t,
1744 cputime_t *utimescaled, cputime_t *stimescaled);
1745extern cputime_t task_gtime(struct task_struct *t);
1746#else
1747static inline void task_cputime(struct task_struct *t,
1748 cputime_t *utime, cputime_t *stime)
1749{
1750 if (utime)
1751 *utime = t->utime;
1752 if (stime)
1753 *stime = t->stime;
1754}
1755
1756static inline void task_cputime_scaled(struct task_struct *t,
1757 cputime_t *utimescaled,
1758 cputime_t *stimescaled)
1759{
1760 if (utimescaled)
1761 *utimescaled = t->utimescaled;
1762 if (stimescaled)
1763 *stimescaled = t->stimescaled;
1764}
1765
1766static inline cputime_t task_gtime(struct task_struct *t)
1767{
1768 return t->gtime;
1769}
1770#endif
1795extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); 1771extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
1796extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); 1772extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
1797 1773
@@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu);
2033static inline void wake_up_idle_cpu(int cpu) { } 2009static inline void wake_up_idle_cpu(int cpu) { }
2034#endif 2010#endif
2035 2011
2036extern unsigned int sysctl_sched_latency;
2037extern unsigned int sysctl_sched_min_granularity;
2038extern unsigned int sysctl_sched_wakeup_granularity;
2039extern unsigned int sysctl_sched_child_runs_first;
2040
2041enum sched_tunable_scaling {
2042 SCHED_TUNABLESCALING_NONE,
2043 SCHED_TUNABLESCALING_LOG,
2044 SCHED_TUNABLESCALING_LINEAR,
2045 SCHED_TUNABLESCALING_END,
2046};
2047extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
2048
2049extern unsigned int sysctl_numa_balancing_scan_delay;
2050extern unsigned int sysctl_numa_balancing_scan_period_min;
2051extern unsigned int sysctl_numa_balancing_scan_period_max;
2052extern unsigned int sysctl_numa_balancing_scan_period_reset;
2053extern unsigned int sysctl_numa_balancing_scan_size;
2054extern unsigned int sysctl_numa_balancing_settle_count;
2055
2056#ifdef CONFIG_SCHED_DEBUG
2057extern unsigned int sysctl_sched_migration_cost;
2058extern unsigned int sysctl_sched_nr_migrate;
2059extern unsigned int sysctl_sched_time_avg;
2060extern unsigned int sysctl_timer_migration;
2061extern unsigned int sysctl_sched_shares_window;
2062
2063int sched_proc_update_handler(struct ctl_table *table, int write,
2064 void __user *buffer, size_t *length,
2065 loff_t *ppos);
2066#endif
2067#ifdef CONFIG_SCHED_DEBUG
2068static inline unsigned int get_sysctl_timer_migration(void)
2069{
2070 return sysctl_timer_migration;
2071}
2072#else
2073static inline unsigned int get_sysctl_timer_migration(void)
2074{
2075 return 1;
2076}
2077#endif
2078extern unsigned int sysctl_sched_rt_period;
2079extern int sysctl_sched_rt_runtime;
2080
2081int sched_rt_handler(struct ctl_table *table, int write,
2082 void __user *buffer, size_t *lenp,
2083 loff_t *ppos);
2084
2085#ifdef CONFIG_SCHED_AUTOGROUP 2012#ifdef CONFIG_SCHED_AUTOGROUP
2086extern unsigned int sysctl_sched_autogroup_enabled;
2087
2088extern void sched_autogroup_create_attach(struct task_struct *p); 2013extern void sched_autogroup_create_attach(struct task_struct *p);
2089extern void sched_autogroup_detach(struct task_struct *p); 2014extern void sched_autogroup_detach(struct task_struct *p);
2090extern void sched_autogroup_fork(struct signal_struct *sig); 2015extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
2100static inline void sched_autogroup_exit(struct signal_struct *sig) { } 2025static inline void sched_autogroup_exit(struct signal_struct *sig) { }
2101#endif 2026#endif
2102 2027
2103#ifdef CONFIG_CFS_BANDWIDTH
2104extern unsigned int sysctl_sched_cfs_bandwidth_slice;
2105#endif
2106
2107#ifdef CONFIG_RT_MUTEXES
2108extern int rt_mutex_getprio(struct task_struct *p);
2109extern void rt_mutex_setprio(struct task_struct *p, int prio);
2110extern void rt_mutex_adjust_pi(struct task_struct *p);
2111static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
2112{
2113 return tsk->pi_blocked_on != NULL;
2114}
2115#else
2116static inline int rt_mutex_getprio(struct task_struct *p)
2117{
2118 return p->normal_prio;
2119}
2120# define rt_mutex_adjust_pi(p) do { } while (0)
2121static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
2122{
2123 return false;
2124}
2125#endif
2126
2127extern bool yield_to(struct task_struct *p, bool preempt); 2028extern bool yield_to(struct task_struct *p, bool preempt);
2128extern void set_user_nice(struct task_struct *p, long nice); 2029extern void set_user_nice(struct task_struct *p, long nice);
2129extern int task_prio(const struct task_struct *p); 2030extern int task_prio(const struct task_struct *p);
@@ -2753,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
2753extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); 2654extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2754extern long sched_getaffinity(pid_t pid, struct cpumask *mask); 2655extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2755 2656
2756extern void normalize_rt_tasks(void);
2757
2758#ifdef CONFIG_CGROUP_SCHED 2657#ifdef CONFIG_CGROUP_SCHED
2759 2658
2760extern struct task_group root_task_group; 2659extern struct task_group root_task_group;
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
new file mode 100644
index 000000000000..94e19ea28fc3
--- /dev/null
+++ b/include/linux/sched/rt.h
@@ -0,0 +1,58 @@
1#ifndef _SCHED_RT_H
2#define _SCHED_RT_H
3
4/*
5 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
6 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
7 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
8 * values are inverted: lower p->prio value means higher priority.
9 *
10 * The MAX_USER_RT_PRIO value allows the actual maximum
11 * RT priority to be separate from the value exported to
12 * user-space. This allows kernel threads to set their
13 * priority to a value higher than any user task. Note:
14 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
15 */
16
17#define MAX_USER_RT_PRIO 100
18#define MAX_RT_PRIO MAX_USER_RT_PRIO
19
20#define MAX_PRIO (MAX_RT_PRIO + 40)
21#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
22
23static inline int rt_prio(int prio)
24{
25 if (unlikely(prio < MAX_RT_PRIO))
26 return 1;
27 return 0;
28}
29
30static inline int rt_task(struct task_struct *p)
31{
32 return rt_prio(p->prio);
33}
34
35#ifdef CONFIG_RT_MUTEXES
36extern int rt_mutex_getprio(struct task_struct *p);
37extern void rt_mutex_setprio(struct task_struct *p, int prio);
38extern void rt_mutex_adjust_pi(struct task_struct *p);
39static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
40{
41 return tsk->pi_blocked_on != NULL;
42}
43#else
44static inline int rt_mutex_getprio(struct task_struct *p)
45{
46 return p->normal_prio;
47}
48# define rt_mutex_adjust_pi(p) do { } while (0)
49static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
50{
51 return false;
52}
53#endif
54
55extern void normalize_rt_tasks(void);
56
57
58#endif /* _SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
new file mode 100644
index 000000000000..d2bb0ae979d0
--- /dev/null
+++ b/include/linux/sched/sysctl.h
@@ -0,0 +1,110 @@
1#ifndef _SCHED_SYSCTL_H
2#define _SCHED_SYSCTL_H
3
4#ifdef CONFIG_DETECT_HUNG_TASK
5extern unsigned int sysctl_hung_task_panic;
6extern unsigned long sysctl_hung_task_check_count;
7extern unsigned long sysctl_hung_task_timeout_secs;
8extern unsigned long sysctl_hung_task_warnings;
9extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
10 void __user *buffer,
11 size_t *lenp, loff_t *ppos);
12#else
13/* Avoid need for ifdefs elsewhere in the code */
14enum { sysctl_hung_task_timeout_secs = 0 };
15#endif
16
17/*
18 * Default maximum number of active map areas, this limits the number of vmas
19 * per mm struct. Users can overwrite this number by sysctl but there is a
20 * problem.
21 *
22 * When a program's coredump is generated as ELF format, a section is created
23 * per a vma. In ELF, the number of sections is represented in unsigned short.
24 * This means the number of sections should be smaller than 65535 at coredump.
25 * Because the kernel adds some informative sections to a image of program at
26 * generating coredump, we need some margin. The number of extra sections is
27 * 1-3 now and depends on arch. We use "5" as safe margin, here.
28 */
29#define MAPCOUNT_ELF_CORE_MARGIN (5)
30#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
31
32extern int sysctl_max_map_count;
33
34extern unsigned int sysctl_sched_latency;
35extern unsigned int sysctl_sched_min_granularity;
36extern unsigned int sysctl_sched_wakeup_granularity;
37extern unsigned int sysctl_sched_child_runs_first;
38
39enum sched_tunable_scaling {
40 SCHED_TUNABLESCALING_NONE,
41 SCHED_TUNABLESCALING_LOG,
42 SCHED_TUNABLESCALING_LINEAR,
43 SCHED_TUNABLESCALING_END,
44};
45extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
46
47extern unsigned int sysctl_numa_balancing_scan_delay;
48extern unsigned int sysctl_numa_balancing_scan_period_min;
49extern unsigned int sysctl_numa_balancing_scan_period_max;
50extern unsigned int sysctl_numa_balancing_scan_period_reset;
51extern unsigned int sysctl_numa_balancing_scan_size;
52extern unsigned int sysctl_numa_balancing_settle_count;
53
54#ifdef CONFIG_SCHED_DEBUG
55extern unsigned int sysctl_sched_migration_cost;
56extern unsigned int sysctl_sched_nr_migrate;
57extern unsigned int sysctl_sched_time_avg;
58extern unsigned int sysctl_timer_migration;
59extern unsigned int sysctl_sched_shares_window;
60
61int sched_proc_update_handler(struct ctl_table *table, int write,
62 void __user *buffer, size_t *length,
63 loff_t *ppos);
64#endif
65#ifdef CONFIG_SCHED_DEBUG
66static inline unsigned int get_sysctl_timer_migration(void)
67{
68 return sysctl_timer_migration;
69}
70#else
71static inline unsigned int get_sysctl_timer_migration(void)
72{
73 return 1;
74}
75#endif
76
77/*
78 * control realtime throttling:
79 *
80 * /proc/sys/kernel/sched_rt_period_us
81 * /proc/sys/kernel/sched_rt_runtime_us
82 */
83extern unsigned int sysctl_sched_rt_period;
84extern int sysctl_sched_rt_runtime;
85
86#ifdef CONFIG_CFS_BANDWIDTH
87extern unsigned int sysctl_sched_cfs_bandwidth_slice;
88#endif
89
90#ifdef CONFIG_SCHED_AUTOGROUP
91extern unsigned int sysctl_sched_autogroup_enabled;
92#endif
93
94/*
95 * default timeslice is 100 msecs (used only for SCHED_RR tasks).
96 * Timeslices get refilled after they expire.
97 */
98#define RR_TIMESLICE (100 * HZ / 1000)
99
100extern int sched_rr_timeslice;
101
102extern int sched_rr_handler(struct ctl_table *table, int write,
103 void __user *buffer, size_t *lenp,
104 loff_t *ppos);
105
106extern int sched_rt_handler(struct ctl_table *table, int write,
107 void __user *buffer, size_t *lenp,
108 loff_t *ppos);
109
110#endif /* _SCHED_SYSCTL_H */
diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h
index 44893e5ec8f7..3251965bf4cc 100644
--- a/include/linux/tsacct_kern.h
+++ b/include/linux/tsacct_kern.h
@@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
23#ifdef CONFIG_TASK_XACCT 23#ifdef CONFIG_TASK_XACCT
24extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); 24extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
25extern void acct_update_integrals(struct task_struct *tsk); 25extern void acct_update_integrals(struct task_struct *tsk);
26extern void acct_account_cputime(struct task_struct *tsk);
26extern void acct_clear_integrals(struct task_struct *tsk); 27extern void acct_clear_integrals(struct task_struct *tsk);
27#else 28#else
28static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) 29static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
29{} 30{}
30static inline void acct_update_integrals(struct task_struct *tsk) 31static inline void acct_update_integrals(struct task_struct *tsk)
31{} 32{}
33static inline void acct_account_cputime(struct task_struct *tsk)
34{}
32static inline void acct_clear_integrals(struct task_struct *tsk) 35static inline void acct_clear_integrals(struct task_struct *tsk)
33{} 36{}
34#endif /* CONFIG_TASK_XACCT */ 37#endif /* CONFIG_TASK_XACCT */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index ae30ab58431a..71a5782d8c59 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -6,15 +6,46 @@ struct task_struct;
6#ifdef CONFIG_VIRT_CPU_ACCOUNTING 6#ifdef CONFIG_VIRT_CPU_ACCOUNTING
7extern void vtime_task_switch(struct task_struct *prev); 7extern void vtime_task_switch(struct task_struct *prev);
8extern void vtime_account_system(struct task_struct *tsk); 8extern void vtime_account_system(struct task_struct *tsk);
9extern void vtime_account_system_irqsafe(struct task_struct *tsk);
10extern void vtime_account_idle(struct task_struct *tsk); 9extern void vtime_account_idle(struct task_struct *tsk);
11extern void vtime_account_user(struct task_struct *tsk); 10extern void vtime_account_user(struct task_struct *tsk);
12extern void vtime_account(struct task_struct *tsk); 11extern void vtime_account_irq_enter(struct task_struct *tsk);
13#else 12
13#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
14static inline bool vtime_accounting_enabled(void) { return true; }
15#endif
16
17#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
18
14static inline void vtime_task_switch(struct task_struct *prev) { } 19static inline void vtime_task_switch(struct task_struct *prev) { }
15static inline void vtime_account_system(struct task_struct *tsk) { } 20static inline void vtime_account_system(struct task_struct *tsk) { }
16static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } 21static inline void vtime_account_user(struct task_struct *tsk) { }
17static inline void vtime_account(struct task_struct *tsk) { } 22static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
23static inline bool vtime_accounting_enabled(void) { return false; }
24#endif
25
26#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
27extern void arch_vtime_task_switch(struct task_struct *tsk);
28extern void vtime_account_irq_exit(struct task_struct *tsk);
29extern bool vtime_accounting_enabled(void);
30extern void vtime_user_enter(struct task_struct *tsk);
31static inline void vtime_user_exit(struct task_struct *tsk)
32{
33 vtime_account_user(tsk);
34}
35extern void vtime_guest_enter(struct task_struct *tsk);
36extern void vtime_guest_exit(struct task_struct *tsk);
37extern void vtime_init_idle(struct task_struct *tsk);
38#else
39static inline void vtime_account_irq_exit(struct task_struct *tsk)
40{
41 /* On hard|softirq exit we always account to hard|softirq cputime */
42 vtime_account_system(tsk);
43}
44static inline void vtime_user_enter(struct task_struct *tsk) { }
45static inline void vtime_user_exit(struct task_struct *tsk) { }
46static inline void vtime_guest_enter(struct task_struct *tsk) { }
47static inline void vtime_guest_exit(struct task_struct *tsk) { }
48static inline void vtime_init_idle(struct task_struct *tsk) { }
18#endif 49#endif
19 50
20#ifdef CONFIG_IRQ_TIME_ACCOUNTING 51#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
23static inline void irqtime_account_irq(struct task_struct *tsk) { } 54static inline void irqtime_account_irq(struct task_struct *tsk) { }
24#endif 55#endif
25 56
26static inline void vtime_account_irq_enter(struct task_struct *tsk) 57static inline void account_irq_enter_time(struct task_struct *tsk)
27{ 58{
28 /* 59 vtime_account_irq_enter(tsk);
29 * Hardirq can interrupt idle task anytime. So we need vtime_account()
30 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
31 * Softirq can also interrupt idle task directly if it calls
32 * local_bh_enable(). Such case probably don't exist but we never know.
33 * Ksoftirqd is not concerned because idle time is flushed on context
34 * switch. Softirqs in the end of hardirqs are also not a problem because
35 * the idle time is flushed on hardirq time already.
36 */
37 vtime_account(tsk);
38 irqtime_account_irq(tsk); 60 irqtime_account_irq(tsk);
39} 61}
40 62
41static inline void vtime_account_irq_exit(struct task_struct *tsk) 63static inline void account_irq_exit_time(struct task_struct *tsk)
42{ 64{
43 /* On hard|softirq exit we always account to hard|softirq cputime */ 65 vtime_account_irq_exit(tsk);
44 vtime_account_system(tsk);
45 irqtime_account_irq(tsk); 66 irqtime_account_irq(tsk);
46} 67}
47 68