aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-11 16:23:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-11 16:23:18 -0400
commit774a694f8cd08115d130a290d73c6d8563f26b1b (patch)
tree2b5f834ac7a149278d2a7e44d7afe69f40ef1431 /include
parent4f0ac854167846bd55cd81dbc9a36e03708aa01c (diff)
parente1f8450854d69f0291882804406ea1bab3ca44b4 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (64 commits) sched: Fix sched::sched_stat_wait tracepoint field sched: Disable NEW_FAIR_SLEEPERS for now sched: Keep kthreads at default priority sched: Re-tune the scheduler latency defaults to decrease worst-case latencies sched: Turn off child_runs_first sched: Ensure that a child can't gain time over it's parent after fork() sched: enable SD_WAKE_IDLE sched: Deal with low-load in wake_affine() sched: Remove short cut from select_task_rq_fair() sched: Turn on SD_BALANCE_NEWIDLE sched: Clean up topology.h sched: Fix dynamic power-balancing crash sched: Remove reciprocal for cpu_power sched: Try to deal with low capacity, fix update_sd_power_savings_stats() sched: Try to deal with low capacity sched: Scale down cpu_power due to RT tasks sched: Implement dynamic cpu_power sched: Add smt_gain sched: Update the cpu_power sum during load-balance sched: Add SD_PREFER_SIBLING ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/hardirq.h6
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/sched.h94
-rw-r--r--include/linux/topology.h168
-rw-r--r--include/trace/events/sched.h95
5 files changed, 260 insertions, 108 deletions
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 330cb31bb496..6d527ee82b2b 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -64,6 +64,12 @@
64#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) 64#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
65#define NMI_OFFSET (1UL << NMI_SHIFT) 65#define NMI_OFFSET (1UL << NMI_SHIFT)
66 66
67#ifndef PREEMPT_ACTIVE
68#define PREEMPT_ACTIVE_BITS 1
69#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
70#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
71#endif
72
67#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) 73#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
68#error PREEMPT_ACTIVE is too low! 74#error PREEMPT_ACTIVE is too low!
69#endif 75#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d6320a3e8def..2b5b1e0899a8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -125,7 +125,7 @@ extern int _cond_resched(void);
125#endif 125#endif
126 126
127#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 127#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
128 void __might_sleep(char *file, int line); 128 void __might_sleep(char *file, int line, int preempt_offset);
129/** 129/**
130 * might_sleep - annotation for functions that can sleep 130 * might_sleep - annotation for functions that can sleep
131 * 131 *
@@ -137,8 +137,9 @@ extern int _cond_resched(void);
137 * supposed to. 137 * supposed to.
138 */ 138 */
139# define might_sleep() \ 139# define might_sleep() \
140 do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) 140 do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
141#else 141#else
142 static inline void __might_sleep(char *file, int line, int preempt_offset) { }
142# define might_sleep() do { might_resched(); } while (0) 143# define might_sleep() do { might_resched(); } while (0)
143#endif 144#endif
144 145
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 379531c08975..f3d74bd04d18 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -38,6 +38,8 @@
38#define SCHED_BATCH 3 38#define SCHED_BATCH 3
39/* SCHED_ISO: reserved but not implemented yet */ 39/* SCHED_ISO: reserved but not implemented yet */
40#define SCHED_IDLE 5 40#define SCHED_IDLE 5
41/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
42#define SCHED_RESET_ON_FORK 0x40000000
41 43
42#ifdef __KERNEL__ 44#ifdef __KERNEL__
43 45
@@ -796,18 +798,19 @@ enum cpu_idle_type {
796#define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE 798#define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE
797 799
798#ifdef CONFIG_SMP 800#ifdef CONFIG_SMP
799#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ 801#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */
800#define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ 802#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
801#define SD_BALANCE_EXEC 4 /* Balance on exec */ 803#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
802#define SD_BALANCE_FORK 8 /* Balance on fork, clone */ 804#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
803#define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ 805#define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */
804#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ 806#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
805#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ 807#define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */
806#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ 808#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
807#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ 809#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
808#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ 810#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
809#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ 811#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
810#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ 812#define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */
813#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
811 814
812enum powersavings_balance_level { 815enum powersavings_balance_level {
813 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ 816 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
@@ -827,7 +830,7 @@ static inline int sd_balance_for_mc_power(void)
827 if (sched_smt_power_savings) 830 if (sched_smt_power_savings)
828 return SD_POWERSAVINGS_BALANCE; 831 return SD_POWERSAVINGS_BALANCE;
829 832
830 return 0; 833 return SD_PREFER_SIBLING;
831} 834}
832 835
833static inline int sd_balance_for_package_power(void) 836static inline int sd_balance_for_package_power(void)
@@ -835,7 +838,7 @@ static inline int sd_balance_for_package_power(void)
835 if (sched_mc_power_savings | sched_smt_power_savings) 838 if (sched_mc_power_savings | sched_smt_power_savings)
836 return SD_POWERSAVINGS_BALANCE; 839 return SD_POWERSAVINGS_BALANCE;
837 840
838 return 0; 841 return SD_PREFER_SIBLING;
839} 842}
840 843
841/* 844/*
@@ -857,15 +860,9 @@ struct sched_group {
857 860
858 /* 861 /*
859 * CPU power of this group, SCHED_LOAD_SCALE being max power for a 862 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
860 * single CPU. This is read only (except for setup, hotplug CPU). 863 * single CPU.
861 * Note : Never change cpu_power without recompute its reciprocal
862 */
863 unsigned int __cpu_power;
864 /*
865 * reciprocal value of cpu_power to avoid expensive divides
866 * (see include/linux/reciprocal_div.h)
867 */ 864 */
868 u32 reciprocal_cpu_power; 865 unsigned int cpu_power;
869 866
870 /* 867 /*
871 * The CPUs this group covers. 868 * The CPUs this group covers.
@@ -918,6 +915,7 @@ struct sched_domain {
918 unsigned int newidle_idx; 915 unsigned int newidle_idx;
919 unsigned int wake_idx; 916 unsigned int wake_idx;
920 unsigned int forkexec_idx; 917 unsigned int forkexec_idx;
918 unsigned int smt_gain;
921 int flags; /* See SD_* */ 919 int flags; /* See SD_* */
922 enum sched_domain_level level; 920 enum sched_domain_level level;
923 921
@@ -1045,7 +1043,6 @@ struct sched_class {
1045 struct rq *busiest, struct sched_domain *sd, 1043 struct rq *busiest, struct sched_domain *sd,
1046 enum cpu_idle_type idle); 1044 enum cpu_idle_type idle);
1047 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); 1045 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
1048 int (*needs_post_schedule) (struct rq *this_rq);
1049 void (*post_schedule) (struct rq *this_rq); 1046 void (*post_schedule) (struct rq *this_rq);
1050 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); 1047 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
1051 1048
@@ -1110,6 +1107,8 @@ struct sched_entity {
1110 u64 wait_max; 1107 u64 wait_max;
1111 u64 wait_count; 1108 u64 wait_count;
1112 u64 wait_sum; 1109 u64 wait_sum;
1110 u64 iowait_count;
1111 u64 iowait_sum;
1113 1112
1114 u64 sleep_start; 1113 u64 sleep_start;
1115 u64 sleep_max; 1114 u64 sleep_max;
@@ -1234,11 +1233,19 @@ struct task_struct {
1234 unsigned did_exec:1; 1233 unsigned did_exec:1;
1235 unsigned in_execve:1; /* Tell the LSMs that the process is doing an 1234 unsigned in_execve:1; /* Tell the LSMs that the process is doing an
1236 * execve */ 1235 * execve */
1236 unsigned in_iowait:1;
1237
1238
1239 /* Revert to default priority/policy when forking */
1240 unsigned sched_reset_on_fork:1;
1241
1237 pid_t pid; 1242 pid_t pid;
1238 pid_t tgid; 1243 pid_t tgid;
1239 1244
1245#ifdef CONFIG_CC_STACKPROTECTOR
1240 /* Canary value for the -fstack-protector gcc feature */ 1246 /* Canary value for the -fstack-protector gcc feature */
1241 unsigned long stack_canary; 1247 unsigned long stack_canary;
1248#endif
1242 1249
1243 /* 1250 /*
1244 * pointers to (original) parent process, youngest child, younger sibling, 1251 * pointers to (original) parent process, youngest child, younger sibling,
@@ -1840,11 +1847,12 @@ extern unsigned int sysctl_sched_min_granularity;
1840extern unsigned int sysctl_sched_wakeup_granularity; 1847extern unsigned int sysctl_sched_wakeup_granularity;
1841extern unsigned int sysctl_sched_shares_ratelimit; 1848extern unsigned int sysctl_sched_shares_ratelimit;
1842extern unsigned int sysctl_sched_shares_thresh; 1849extern unsigned int sysctl_sched_shares_thresh;
1843#ifdef CONFIG_SCHED_DEBUG
1844extern unsigned int sysctl_sched_child_runs_first; 1850extern unsigned int sysctl_sched_child_runs_first;
1851#ifdef CONFIG_SCHED_DEBUG
1845extern unsigned int sysctl_sched_features; 1852extern unsigned int sysctl_sched_features;
1846extern unsigned int sysctl_sched_migration_cost; 1853extern unsigned int sysctl_sched_migration_cost;
1847extern unsigned int sysctl_sched_nr_migrate; 1854extern unsigned int sysctl_sched_nr_migrate;
1855extern unsigned int sysctl_sched_time_avg;
1848extern unsigned int sysctl_timer_migration; 1856extern unsigned int sysctl_timer_migration;
1849 1857
1850int sched_nr_latency_handler(struct ctl_table *table, int write, 1858int sched_nr_latency_handler(struct ctl_table *table, int write,
@@ -2308,23 +2316,31 @@ static inline int need_resched(void)
2308 * cond_resched_softirq() will enable bhs before scheduling. 2316 * cond_resched_softirq() will enable bhs before scheduling.
2309 */ 2317 */
2310extern int _cond_resched(void); 2318extern int _cond_resched(void);
2311#ifdef CONFIG_PREEMPT_BKL 2319
2312static inline int cond_resched(void) 2320#define cond_resched() ({ \
2313{ 2321 __might_sleep(__FILE__, __LINE__, 0); \
2314 return 0; 2322 _cond_resched(); \
2315} 2323})
2324
2325extern int __cond_resched_lock(spinlock_t *lock);
2326
2327#ifdef CONFIG_PREEMPT
2328#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
2316#else 2329#else
2317static inline int cond_resched(void) 2330#define PREEMPT_LOCK_OFFSET 0
2318{
2319 return _cond_resched();
2320}
2321#endif 2331#endif
2322extern int cond_resched_lock(spinlock_t * lock); 2332
2323extern int cond_resched_softirq(void); 2333#define cond_resched_lock(lock) ({ \
2324static inline int cond_resched_bkl(void) 2334 __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
2325{ 2335 __cond_resched_lock(lock); \
2326 return _cond_resched(); 2336})
2327} 2337
2338extern int __cond_resched_softirq(void);
2339
2340#define cond_resched_softirq() ({ \
2341 __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \
2342 __cond_resched_softirq(); \
2343})
2328 2344
2329/* 2345/*
2330 * Does a critical section need to be broken due to another 2346 * Does a critical section need to be broken due to another
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 7402c1a27c4f..85e8cf7d393c 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -85,20 +85,29 @@ int arch_update_cpu_topology(void);
85#define ARCH_HAS_SCHED_WAKE_IDLE 85#define ARCH_HAS_SCHED_WAKE_IDLE
86/* Common values for SMT siblings */ 86/* Common values for SMT siblings */
87#ifndef SD_SIBLING_INIT 87#ifndef SD_SIBLING_INIT
88#define SD_SIBLING_INIT (struct sched_domain) { \ 88#define SD_SIBLING_INIT (struct sched_domain) { \
89 .min_interval = 1, \ 89 .min_interval = 1, \
90 .max_interval = 2, \ 90 .max_interval = 2, \
91 .busy_factor = 64, \ 91 .busy_factor = 64, \
92 .imbalance_pct = 110, \ 92 .imbalance_pct = 110, \
93 .flags = SD_LOAD_BALANCE \ 93 \
94 | SD_BALANCE_NEWIDLE \ 94 .flags = 1*SD_LOAD_BALANCE \
95 | SD_BALANCE_FORK \ 95 | 1*SD_BALANCE_NEWIDLE \
96 | SD_BALANCE_EXEC \ 96 | 1*SD_BALANCE_EXEC \
97 | SD_WAKE_AFFINE \ 97 | 1*SD_BALANCE_FORK \
98 | SD_WAKE_BALANCE \ 98 | 0*SD_WAKE_IDLE \
99 | SD_SHARE_CPUPOWER, \ 99 | 1*SD_WAKE_AFFINE \
100 .last_balance = jiffies, \ 100 | 1*SD_WAKE_BALANCE \
101 .balance_interval = 1, \ 101 | 1*SD_SHARE_CPUPOWER \
102 | 0*SD_POWERSAVINGS_BALANCE \
103 | 0*SD_SHARE_PKG_RESOURCES \
104 | 0*SD_SERIALIZE \
105 | 0*SD_WAKE_IDLE_FAR \
106 | 0*SD_PREFER_SIBLING \
107 , \
108 .last_balance = jiffies, \
109 .balance_interval = 1, \
110 .smt_gain = 1178, /* 15% */ \
102} 111}
103#endif 112#endif
104#endif /* CONFIG_SCHED_SMT */ 113#endif /* CONFIG_SCHED_SMT */
@@ -106,69 +115,94 @@ int arch_update_cpu_topology(void);
106#ifdef CONFIG_SCHED_MC 115#ifdef CONFIG_SCHED_MC
107/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */ 116/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
108#ifndef SD_MC_INIT 117#ifndef SD_MC_INIT
109#define SD_MC_INIT (struct sched_domain) { \ 118#define SD_MC_INIT (struct sched_domain) { \
110 .min_interval = 1, \ 119 .min_interval = 1, \
111 .max_interval = 4, \ 120 .max_interval = 4, \
112 .busy_factor = 64, \ 121 .busy_factor = 64, \
113 .imbalance_pct = 125, \ 122 .imbalance_pct = 125, \
114 .cache_nice_tries = 1, \ 123 .cache_nice_tries = 1, \
115 .busy_idx = 2, \ 124 .busy_idx = 2, \
116 .wake_idx = 1, \ 125 .wake_idx = 1, \
117 .forkexec_idx = 1, \ 126 .forkexec_idx = 1, \
118 .flags = SD_LOAD_BALANCE \ 127 \
119 | SD_BALANCE_FORK \ 128 .flags = 1*SD_LOAD_BALANCE \
120 | SD_BALANCE_EXEC \ 129 | 1*SD_BALANCE_NEWIDLE \
121 | SD_WAKE_AFFINE \ 130 | 1*SD_BALANCE_EXEC \
122 | SD_WAKE_BALANCE \ 131 | 1*SD_BALANCE_FORK \
123 | SD_SHARE_PKG_RESOURCES\ 132 | 1*SD_WAKE_IDLE \
124 | sd_balance_for_mc_power()\ 133 | 1*SD_WAKE_AFFINE \
125 | sd_power_saving_flags(),\ 134 | 1*SD_WAKE_BALANCE \
126 .last_balance = jiffies, \ 135 | 0*SD_SHARE_CPUPOWER \
127 .balance_interval = 1, \ 136 | 1*SD_SHARE_PKG_RESOURCES \
137 | 0*SD_SERIALIZE \
138 | 0*SD_WAKE_IDLE_FAR \
139 | sd_balance_for_mc_power() \
140 | sd_power_saving_flags() \
141 , \
142 .last_balance = jiffies, \
143 .balance_interval = 1, \
128} 144}
129#endif 145#endif
130#endif /* CONFIG_SCHED_MC */ 146#endif /* CONFIG_SCHED_MC */
131 147
132/* Common values for CPUs */ 148/* Common values for CPUs */
133#ifndef SD_CPU_INIT 149#ifndef SD_CPU_INIT
134#define SD_CPU_INIT (struct sched_domain) { \ 150#define SD_CPU_INIT (struct sched_domain) { \
135 .min_interval = 1, \ 151 .min_interval = 1, \
136 .max_interval = 4, \ 152 .max_interval = 4, \
137 .busy_factor = 64, \ 153 .busy_factor = 64, \
138 .imbalance_pct = 125, \ 154 .imbalance_pct = 125, \
139 .cache_nice_tries = 1, \ 155 .cache_nice_tries = 1, \
140 .busy_idx = 2, \ 156 .busy_idx = 2, \
141 .idle_idx = 1, \ 157 .idle_idx = 1, \
142 .newidle_idx = 2, \ 158 .newidle_idx = 2, \
143 .wake_idx = 1, \ 159 .wake_idx = 1, \
144 .forkexec_idx = 1, \ 160 .forkexec_idx = 1, \
145 .flags = SD_LOAD_BALANCE \ 161 \
146 | SD_BALANCE_EXEC \ 162 .flags = 1*SD_LOAD_BALANCE \
147 | SD_BALANCE_FORK \ 163 | 1*SD_BALANCE_NEWIDLE \
148 | SD_WAKE_AFFINE \ 164 | 1*SD_BALANCE_EXEC \
149 | SD_WAKE_BALANCE \ 165 | 1*SD_BALANCE_FORK \
150 | sd_balance_for_package_power()\ 166 | 1*SD_WAKE_IDLE \
151 | sd_power_saving_flags(),\ 167 | 0*SD_WAKE_AFFINE \
152 .last_balance = jiffies, \ 168 | 1*SD_WAKE_BALANCE \
153 .balance_interval = 1, \ 169 | 0*SD_SHARE_CPUPOWER \
170 | 0*SD_SHARE_PKG_RESOURCES \
171 | 0*SD_SERIALIZE \
172 | 0*SD_WAKE_IDLE_FAR \
173 | sd_balance_for_package_power() \
174 | sd_power_saving_flags() \
175 , \
176 .last_balance = jiffies, \
177 .balance_interval = 1, \
154} 178}
155#endif 179#endif
156 180
157/* sched_domains SD_ALLNODES_INIT for NUMA machines */ 181/* sched_domains SD_ALLNODES_INIT for NUMA machines */
158#define SD_ALLNODES_INIT (struct sched_domain) { \ 182#define SD_ALLNODES_INIT (struct sched_domain) { \
159 .min_interval = 64, \ 183 .min_interval = 64, \
160 .max_interval = 64*num_online_cpus(), \ 184 .max_interval = 64*num_online_cpus(), \
161 .busy_factor = 128, \ 185 .busy_factor = 128, \
162 .imbalance_pct = 133, \ 186 .imbalance_pct = 133, \
163 .cache_nice_tries = 1, \ 187 .cache_nice_tries = 1, \
164 .busy_idx = 3, \ 188 .busy_idx = 3, \
165 .idle_idx = 3, \ 189 .idle_idx = 3, \
166 .flags = SD_LOAD_BALANCE \ 190 .flags = 1*SD_LOAD_BALANCE \
167 | SD_BALANCE_NEWIDLE \ 191 | 1*SD_BALANCE_NEWIDLE \
168 | SD_WAKE_AFFINE \ 192 | 0*SD_BALANCE_EXEC \
169 | SD_SERIALIZE, \ 193 | 0*SD_BALANCE_FORK \
170 .last_balance = jiffies, \ 194 | 0*SD_WAKE_IDLE \
171 .balance_interval = 64, \ 195 | 1*SD_WAKE_AFFINE \
196 | 0*SD_WAKE_BALANCE \
197 | 0*SD_SHARE_CPUPOWER \
198 | 0*SD_POWERSAVINGS_BALANCE \
199 | 0*SD_SHARE_PKG_RESOURCES \
200 | 1*SD_SERIALIZE \
201 | 1*SD_WAKE_IDLE_FAR \
202 | 0*SD_PREFER_SIBLING \
203 , \
204 .last_balance = jiffies, \
205 .balance_interval = 64, \
172} 206}
173 207
174#ifdef CONFIG_NUMA 208#ifdef CONFIG_NUMA
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 8949bb7eb082..a4c369ec328f 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -340,6 +340,101 @@ TRACE_EVENT(sched_signal_send,
340 __entry->sig, __entry->comm, __entry->pid) 340 __entry->sig, __entry->comm, __entry->pid)
341); 341);
342 342
343/*
344 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
345 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
346 */
347
348/*
349 * Tracepoint for accounting wait time (time the task is runnable
350 * but not actually running due to scheduler contention).
351 */
352TRACE_EVENT(sched_stat_wait,
353
354 TP_PROTO(struct task_struct *tsk, u64 delay),
355
356 TP_ARGS(tsk, delay),
357
358 TP_STRUCT__entry(
359 __array( char, comm, TASK_COMM_LEN )
360 __field( pid_t, pid )
361 __field( u64, delay )
362 ),
363
364 TP_fast_assign(
365 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
366 __entry->pid = tsk->pid;
367 __entry->delay = delay;
368 )
369 TP_perf_assign(
370 __perf_count(delay);
371 ),
372
373 TP_printk("task: %s:%d wait: %Lu [ns]",
374 __entry->comm, __entry->pid,
375 (unsigned long long)__entry->delay)
376);
377
378/*
379 * Tracepoint for accounting sleep time (time the task is not runnable,
380 * including iowait, see below).
381 */
382TRACE_EVENT(sched_stat_sleep,
383
384 TP_PROTO(struct task_struct *tsk, u64 delay),
385
386 TP_ARGS(tsk, delay),
387
388 TP_STRUCT__entry(
389 __array( char, comm, TASK_COMM_LEN )
390 __field( pid_t, pid )
391 __field( u64, delay )
392 ),
393
394 TP_fast_assign(
395 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
396 __entry->pid = tsk->pid;
397 __entry->delay = delay;
398 )
399 TP_perf_assign(
400 __perf_count(delay);
401 ),
402
403 TP_printk("task: %s:%d sleep: %Lu [ns]",
404 __entry->comm, __entry->pid,
405 (unsigned long long)__entry->delay)
406);
407
408/*
409 * Tracepoint for accounting iowait time (time the task is not runnable
410 * due to waiting on IO to complete).
411 */
412TRACE_EVENT(sched_stat_iowait,
413
414 TP_PROTO(struct task_struct *tsk, u64 delay),
415
416 TP_ARGS(tsk, delay),
417
418 TP_STRUCT__entry(
419 __array( char, comm, TASK_COMM_LEN )
420 __field( pid_t, pid )
421 __field( u64, delay )
422 ),
423
424 TP_fast_assign(
425 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
426 __entry->pid = tsk->pid;
427 __entry->delay = delay;
428 )
429 TP_perf_assign(
430 __perf_count(delay);
431 ),
432
433 TP_printk("task: %s:%d iowait: %Lu [ns]",
434 __entry->comm, __entry->pid,
435 (unsigned long long)__entry->delay)
436);
437
343#endif /* _TRACE_SCHED_H */ 438#endif /* _TRACE_SCHED_H */
344 439
345/* This part must be outside protection */ 440/* This part must be outside protection */