diff options
Diffstat (limited to 'kernel/sched_stats.h')
-rw-r--r-- | kernel/sched_stats.h | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h new file mode 100644 index 000000000000..cd82c6078904 --- /dev/null +++ b/kernel/sched_stats.h | |||
@@ -0,0 +1,235 @@ | |||
1 | |||
2 | #ifdef CONFIG_SCHEDSTATS | ||
3 | /* | ||
4 | * bump this up when changing the output format or the meaning of an existing | ||
5 | * format, so that tools can adapt (or abort) | ||
6 | */ | ||
7 | #define SCHEDSTAT_VERSION 14 | ||
8 | |||
9 | static int show_schedstat(struct seq_file *seq, void *v) | ||
10 | { | ||
11 | int cpu; | ||
12 | |||
13 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | ||
14 | seq_printf(seq, "timestamp %lu\n", jiffies); | ||
15 | for_each_online_cpu(cpu) { | ||
16 | struct rq *rq = cpu_rq(cpu); | ||
17 | #ifdef CONFIG_SMP | ||
18 | struct sched_domain *sd; | ||
19 | int dcnt = 0; | ||
20 | #endif | ||
21 | |||
22 | /* runqueue-specific stats */ | ||
23 | seq_printf(seq, | ||
24 | "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | ||
25 | cpu, rq->yld_both_empty, | ||
26 | rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, | ||
27 | rq->sched_switch, rq->sched_cnt, rq->sched_goidle, | ||
28 | rq->ttwu_cnt, rq->ttwu_local, | ||
29 | rq->rq_sched_info.cpu_time, | ||
30 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); | ||
31 | |||
32 | seq_printf(seq, "\n"); | ||
33 | |||
34 | #ifdef CONFIG_SMP | ||
35 | /* domain-specific stats */ | ||
36 | preempt_disable(); | ||
37 | for_each_domain(cpu, sd) { | ||
38 | enum cpu_idle_type itype; | ||
39 | char mask_str[NR_CPUS]; | ||
40 | |||
41 | cpumask_scnprintf(mask_str, NR_CPUS, sd->span); | ||
42 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | ||
43 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; | ||
44 | itype++) { | ||
45 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " | ||
46 | "%lu", | ||
47 | sd->lb_cnt[itype], | ||
48 | sd->lb_balanced[itype], | ||
49 | sd->lb_failed[itype], | ||
50 | sd->lb_imbalance[itype], | ||
51 | sd->lb_gained[itype], | ||
52 | sd->lb_hot_gained[itype], | ||
53 | sd->lb_nobusyq[itype], | ||
54 | sd->lb_nobusyg[itype]); | ||
55 | } | ||
56 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" | ||
57 | " %lu %lu %lu\n", | ||
58 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | ||
59 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, | ||
60 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, | ||
61 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | ||
62 | sd->ttwu_move_balance); | ||
63 | } | ||
64 | preempt_enable(); | ||
65 | #endif | ||
66 | } | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static int schedstat_open(struct inode *inode, struct file *file) | ||
71 | { | ||
72 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | ||
73 | char *buf = kmalloc(size, GFP_KERNEL); | ||
74 | struct seq_file *m; | ||
75 | int res; | ||
76 | |||
77 | if (!buf) | ||
78 | return -ENOMEM; | ||
79 | res = single_open(file, show_schedstat, NULL); | ||
80 | if (!res) { | ||
81 | m = file->private_data; | ||
82 | m->buf = buf; | ||
83 | m->size = size; | ||
84 | } else | ||
85 | kfree(buf); | ||
86 | return res; | ||
87 | } | ||
88 | |||
89 | const struct file_operations proc_schedstat_operations = { | ||
90 | .open = schedstat_open, | ||
91 | .read = seq_read, | ||
92 | .llseek = seq_lseek, | ||
93 | .release = single_release, | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * Expects runqueue lock to be held for atomicity of update | ||
98 | */ | ||
99 | static inline void | ||
100 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | ||
101 | { | ||
102 | if (rq) { | ||
103 | rq->rq_sched_info.run_delay += delta; | ||
104 | rq->rq_sched_info.pcnt++; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Expects runqueue lock to be held for atomicity of update | ||
110 | */ | ||
111 | static inline void | ||
112 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | ||
113 | { | ||
114 | if (rq) | ||
115 | rq->rq_sched_info.cpu_time += delta; | ||
116 | } | ||
117 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | ||
118 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | ||
119 | #else /* !CONFIG_SCHEDSTATS */ | ||
120 | static inline void | ||
121 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | ||
122 | {} | ||
123 | static inline void | ||
124 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | ||
125 | {} | ||
126 | # define schedstat_inc(rq, field) do { } while (0) | ||
127 | # define schedstat_add(rq, field, amt) do { } while (0) | ||
128 | #endif | ||
129 | |||
130 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | ||
131 | /* | ||
132 | * Called when a process is dequeued from the active array and given | ||
133 | * the cpu. We should note that with the exception of interactive | ||
134 | * tasks, the expired queue will become the active queue after the active | ||
135 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
136 | * expired queue. (Interactive tasks may be requeued directly to the | ||
137 | * active queue, thus delaying tasks in the expired queue from running; | ||
138 | * see scheduler_tick()). | ||
139 | * | ||
140 | * This function is only called from sched_info_arrive(), rather than | ||
141 | * dequeue_task(). Even though a task may be queued and dequeued multiple | ||
142 | * times as it is shuffled about, we're really interested in knowing how | ||
143 | * long it was from the *first* time it was queued to the time that it | ||
144 | * finally hit a cpu. | ||
145 | */ | ||
146 | static inline void sched_info_dequeued(struct task_struct *t) | ||
147 | { | ||
148 | t->sched_info.last_queued = 0; | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * Called when a task finally hits the cpu. We can now calculate how | ||
153 | * long it was waiting to run. We also note when it began so that we | ||
154 | * can keep stats on how long its timeslice is. | ||
155 | */ | ||
156 | static void sched_info_arrive(struct task_struct *t) | ||
157 | { | ||
158 | unsigned long long now = sched_clock(), delta = 0; | ||
159 | |||
160 | if (t->sched_info.last_queued) | ||
161 | delta = now - t->sched_info.last_queued; | ||
162 | sched_info_dequeued(t); | ||
163 | t->sched_info.run_delay += delta; | ||
164 | t->sched_info.last_arrival = now; | ||
165 | t->sched_info.pcnt++; | ||
166 | |||
167 | rq_sched_info_arrive(task_rq(t), delta); | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Called when a process is queued into either the active or expired | ||
172 | * array. The time is noted and later used to determine how long we | ||
173 | * had to wait for us to reach the cpu. Since the expired queue will | ||
174 | * become the active queue after active queue is empty, without dequeuing | ||
175 | * and requeuing any tasks, we are interested in queuing to either. It | ||
176 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
177 | * requeued in the same or another array: this can happen in sched_yield(), | ||
178 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
179 | * to runqueue. | ||
180 | * | ||
181 | * This function is only called from enqueue_task(), but also only updates | ||
182 | * the timestamp if it is already not set. It's assumed that | ||
183 | * sched_info_dequeued() will clear that stamp when appropriate. | ||
184 | */ | ||
185 | static inline void sched_info_queued(struct task_struct *t) | ||
186 | { | ||
187 | if (unlikely(sched_info_on())) | ||
188 | if (!t->sched_info.last_queued) | ||
189 | t->sched_info.last_queued = sched_clock(); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Called when a process ceases being the active-running process, either | ||
194 | * voluntarily or involuntarily. Now we can calculate how long we ran. | ||
195 | */ | ||
196 | static inline void sched_info_depart(struct task_struct *t) | ||
197 | { | ||
198 | unsigned long long delta = sched_clock() - t->sched_info.last_arrival; | ||
199 | |||
200 | t->sched_info.cpu_time += delta; | ||
201 | rq_sched_info_depart(task_rq(t), delta); | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * Called when tasks are switched involuntarily due, typically, to expiring | ||
206 | * their time slice. (This may also be called when switching to or from | ||
207 | * the idle task.) We are only called when prev != next. | ||
208 | */ | ||
209 | static inline void | ||
210 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
211 | { | ||
212 | struct rq *rq = task_rq(prev); | ||
213 | |||
214 | /* | ||
215 | * prev now departs the cpu. It's not interesting to record | ||
216 | * stats about how efficient we were at scheduling the idle | ||
217 | * process, however. | ||
218 | */ | ||
219 | if (prev != rq->idle) | ||
220 | sched_info_depart(prev); | ||
221 | |||
222 | if (next != rq->idle) | ||
223 | sched_info_arrive(next); | ||
224 | } | ||
225 | static inline void | ||
226 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
227 | { | ||
228 | if (unlikely(sched_info_on())) | ||
229 | __sched_info_switch(prev, next); | ||
230 | } | ||
231 | #else | ||
232 | #define sched_info_queued(t) do { } while (0) | ||
233 | #define sched_info_switch(t, next) do { } while (0) | ||
234 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | ||
235 | |||