aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched.c234
-rw-r--r--kernel/sched_stats.h235
2 files changed, 236 insertions, 233 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index ac4d26241d1e..f8b8eda4494d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -460,134 +460,6 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
460 spin_unlock_irqrestore(&rq->lock, *flags); 460 spin_unlock_irqrestore(&rq->lock, *flags);
461} 461}
462 462
463#ifdef CONFIG_SCHEDSTATS
464/*
465 * bump this up when changing the output format or the meaning of an existing
466 * format, so that tools can adapt (or abort)
467 */
468#define SCHEDSTAT_VERSION 14
469
470static int show_schedstat(struct seq_file *seq, void *v)
471{
472 int cpu;
473
474 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
475 seq_printf(seq, "timestamp %lu\n", jiffies);
476 for_each_online_cpu(cpu) {
477 struct rq *rq = cpu_rq(cpu);
478#ifdef CONFIG_SMP
479 struct sched_domain *sd;
480 int dcnt = 0;
481#endif
482
483 /* runqueue-specific stats */
484 seq_printf(seq,
485 "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
486 cpu, rq->yld_both_empty,
487 rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
488 rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
489 rq->ttwu_cnt, rq->ttwu_local,
490 rq->rq_sched_info.cpu_time,
491 rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
492
493 seq_printf(seq, "\n");
494
495#ifdef CONFIG_SMP
496 /* domain-specific stats */
497 preempt_disable();
498 for_each_domain(cpu, sd) {
499 enum cpu_idle_type itype;
500 char mask_str[NR_CPUS];
501
502 cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
503 seq_printf(seq, "domain%d %s", dcnt++, mask_str);
504 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
505 itype++) {
506 seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
507 "%lu",
508 sd->lb_cnt[itype],
509 sd->lb_balanced[itype],
510 sd->lb_failed[itype],
511 sd->lb_imbalance[itype],
512 sd->lb_gained[itype],
513 sd->lb_hot_gained[itype],
514 sd->lb_nobusyq[itype],
515 sd->lb_nobusyg[itype]);
516 }
517 seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
518 " %lu %lu %lu\n",
519 sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
520 sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
521 sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
522 sd->ttwu_wake_remote, sd->ttwu_move_affine,
523 sd->ttwu_move_balance);
524 }
525 preempt_enable();
526#endif
527 }
528 return 0;
529}
530
531static int schedstat_open(struct inode *inode, struct file *file)
532{
533 unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
534 char *buf = kmalloc(size, GFP_KERNEL);
535 struct seq_file *m;
536 int res;
537
538 if (!buf)
539 return -ENOMEM;
540 res = single_open(file, show_schedstat, NULL);
541 if (!res) {
542 m = file->private_data;
543 m->buf = buf;
544 m->size = size;
545 } else
546 kfree(buf);
547 return res;
548}
549
550const struct file_operations proc_schedstat_operations = {
551 .open = schedstat_open,
552 .read = seq_read,
553 .llseek = seq_lseek,
554 .release = single_release,
555};
556
557/*
558 * Expects runqueue lock to be held for atomicity of update
559 */
560static inline void
561rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
562{
563 if (rq) {
564 rq->rq_sched_info.run_delay += delta_jiffies;
565 rq->rq_sched_info.pcnt++;
566 }
567}
568
569/*
570 * Expects runqueue lock to be held for atomicity of update
571 */
572static inline void
573rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
574{
575 if (rq)
576 rq->rq_sched_info.cpu_time += delta_jiffies;
577}
578# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
579# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
580#else /* !CONFIG_SCHEDSTATS */
581static inline void
582rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
583{}
584static inline void
585rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
586{}
587# define schedstat_inc(rq, field) do { } while (0)
588# define schedstat_add(rq, field, amt) do { } while (0)
589#endif
590
591/* 463/*
592 * this_rq_lock - lock this runqueue and disable interrupts. 464 * this_rq_lock - lock this runqueue and disable interrupts.
593 */ 465 */
@@ -603,111 +475,7 @@ static inline struct rq *this_rq_lock(void)
603 return rq; 475 return rq;
604} 476}
605 477
606#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 478#include "sched_stats.h"
607/*
608 * Called when a process is dequeued from the active array and given
609 * the cpu. We should note that with the exception of interactive
610 * tasks, the expired queue will become the active queue after the active
611 * queue is empty, without explicitly dequeuing and requeuing tasks in the
612 * expired queue. (Interactive tasks may be requeued directly to the
613 * active queue, thus delaying tasks in the expired queue from running;
614 * see scheduler_tick()).
615 *
616 * This function is only called from sched_info_arrive(), rather than
617 * dequeue_task(). Even though a task may be queued and dequeued multiple
618 * times as it is shuffled about, we're really interested in knowing how
619 * long it was from the *first* time it was queued to the time that it
620 * finally hit a cpu.
621 */
622static inline void sched_info_dequeued(struct task_struct *t)
623{
624 t->sched_info.last_queued = 0;
625}
626
627/*
628 * Called when a task finally hits the cpu. We can now calculate how
629 * long it was waiting to run. We also note when it began so that we
630 * can keep stats on how long its timeslice is.
631 */
632static void sched_info_arrive(struct task_struct *t)
633{
634 unsigned long now = jiffies, delta_jiffies = 0;
635
636 if (t->sched_info.last_queued)
637 delta_jiffies = now - t->sched_info.last_queued;
638 sched_info_dequeued(t);
639 t->sched_info.run_delay += delta_jiffies;
640 t->sched_info.last_arrival = now;
641 t->sched_info.pcnt++;
642
643 rq_sched_info_arrive(task_rq(t), delta_jiffies);
644}
645
646/*
647 * Called when a process is queued into either the active or expired
648 * array. The time is noted and later used to determine how long we
649 * had to wait for us to reach the cpu. Since the expired queue will
650 * become the active queue after active queue is empty, without dequeuing
651 * and requeuing any tasks, we are interested in queuing to either. It
652 * is unusual but not impossible for tasks to be dequeued and immediately
653 * requeued in the same or another array: this can happen in sched_yield(),
654 * set_user_nice(), and even load_balance() as it moves tasks from runqueue
655 * to runqueue.
656 *
657 * This function is only called from enqueue_task(), but also only updates
658 * the timestamp if it is already not set. It's assumed that
659 * sched_info_dequeued() will clear that stamp when appropriate.
660 */
661static inline void sched_info_queued(struct task_struct *t)
662{
663 if (unlikely(sched_info_on()))
664 if (!t->sched_info.last_queued)
665 t->sched_info.last_queued = jiffies;
666}
667
668/*
669 * Called when a process ceases being the active-running process, either
670 * voluntarily or involuntarily. Now we can calculate how long we ran.
671 */
672static inline void sched_info_depart(struct task_struct *t)
673{
674 unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
675
676 t->sched_info.cpu_time += delta_jiffies;
677 rq_sched_info_depart(task_rq(t), delta_jiffies);
678}
679
680/*
681 * Called when tasks are switched involuntarily due, typically, to expiring
682 * their time slice. (This may also be called when switching to or from
683 * the idle task.) We are only called when prev != next.
684 */
685static inline void
686__sched_info_switch(struct task_struct *prev, struct task_struct *next)
687{
688 struct rq *rq = task_rq(prev);
689
690 /*
691 * prev now departs the cpu. It's not interesting to record
692 * stats about how efficient we were at scheduling the idle
693 * process, however.
694 */
695 if (prev != rq->idle)
696 sched_info_depart(prev);
697
698 if (next != rq->idle)
699 sched_info_arrive(next);
700}
701static inline void
702sched_info_switch(struct task_struct *prev, struct task_struct *next)
703{
704 if (unlikely(sched_info_on()))
705 __sched_info_switch(prev, next);
706}
707#else
708#define sched_info_queued(t) do { } while (0)
709#define sched_info_switch(t, next) do { } while (0)
710#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
711 479
712/* 480/*
713 * Adding/removing a task to/from a priority array: 481 * Adding/removing a task to/from a priority array:
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
new file mode 100644
index 000000000000..cd82c6078904
--- /dev/null
+++ b/kernel/sched_stats.h
@@ -0,0 +1,235 @@
1
2#ifdef CONFIG_SCHEDSTATS
3/*
4 * bump this up when changing the output format or the meaning of an existing
5 * format, so that tools can adapt (or abort)
6 */
7#define SCHEDSTAT_VERSION 14
8
9static int show_schedstat(struct seq_file *seq, void *v)
10{
11 int cpu;
12
13 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
14 seq_printf(seq, "timestamp %lu\n", jiffies);
15 for_each_online_cpu(cpu) {
16 struct rq *rq = cpu_rq(cpu);
17#ifdef CONFIG_SMP
18 struct sched_domain *sd;
19 int dcnt = 0;
20#endif
21
22 /* runqueue-specific stats */
23 seq_printf(seq,
24 "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
25 cpu, rq->yld_both_empty,
26 rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
27 rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
28 rq->ttwu_cnt, rq->ttwu_local,
29 rq->rq_sched_info.cpu_time,
30 rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
31
32 seq_printf(seq, "\n");
33
34#ifdef CONFIG_SMP
35 /* domain-specific stats */
36 preempt_disable();
37 for_each_domain(cpu, sd) {
38 enum cpu_idle_type itype;
39 char mask_str[NR_CPUS];
40
41 cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
42 seq_printf(seq, "domain%d %s", dcnt++, mask_str);
43 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
44 itype++) {
45 seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu "
46 "%lu",
47 sd->lb_cnt[itype],
48 sd->lb_balanced[itype],
49 sd->lb_failed[itype],
50 sd->lb_imbalance[itype],
51 sd->lb_gained[itype],
52 sd->lb_hot_gained[itype],
53 sd->lb_nobusyq[itype],
54 sd->lb_nobusyg[itype]);
55 }
56 seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu"
57 " %lu %lu %lu\n",
58 sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
59 sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed,
60 sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed,
61 sd->ttwu_wake_remote, sd->ttwu_move_affine,
62 sd->ttwu_move_balance);
63 }
64 preempt_enable();
65#endif
66 }
67 return 0;
68}
69
70static int schedstat_open(struct inode *inode, struct file *file)
71{
72 unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
73 char *buf = kmalloc(size, GFP_KERNEL);
74 struct seq_file *m;
75 int res;
76
77 if (!buf)
78 return -ENOMEM;
79 res = single_open(file, show_schedstat, NULL);
80 if (!res) {
81 m = file->private_data;
82 m->buf = buf;
83 m->size = size;
84 } else
85 kfree(buf);
86 return res;
87}
88
89const struct file_operations proc_schedstat_operations = {
90 .open = schedstat_open,
91 .read = seq_read,
92 .llseek = seq_lseek,
93 .release = single_release,
94};
95
96/*
97 * Expects runqueue lock to be held for atomicity of update
98 */
99static inline void
100rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
101{
102 if (rq) {
103 rq->rq_sched_info.run_delay += delta;
104 rq->rq_sched_info.pcnt++;
105 }
106}
107
108/*
109 * Expects runqueue lock to be held for atomicity of update
110 */
111static inline void
112rq_sched_info_depart(struct rq *rq, unsigned long long delta)
113{
114 if (rq)
115 rq->rq_sched_info.cpu_time += delta;
116}
117# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
118# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
119#else /* !CONFIG_SCHEDSTATS */
120static inline void
121rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
122{}
123static inline void
124rq_sched_info_depart(struct rq *rq, unsigned long long delta)
125{}
126# define schedstat_inc(rq, field) do { } while (0)
127# define schedstat_add(rq, field, amt) do { } while (0)
128#endif
129
130#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
131/*
132 * Called when a process is dequeued from the active array and given
133 * the cpu. We should note that with the exception of interactive
134 * tasks, the expired queue will become the active queue after the active
135 * queue is empty, without explicitly dequeuing and requeuing tasks in the
136 * expired queue. (Interactive tasks may be requeued directly to the
137 * active queue, thus delaying tasks in the expired queue from running;
138 * see scheduler_tick()).
139 *
140 * This function is only called from sched_info_arrive(), rather than
141 * dequeue_task(). Even though a task may be queued and dequeued multiple
142 * times as it is shuffled about, we're really interested in knowing how
143 * long it was from the *first* time it was queued to the time that it
144 * finally hit a cpu.
145 */
146static inline void sched_info_dequeued(struct task_struct *t)
147{
148 t->sched_info.last_queued = 0;
149}
150
151/*
152 * Called when a task finally hits the cpu. We can now calculate how
153 * long it was waiting to run. We also note when it began so that we
154 * can keep stats on how long its timeslice is.
155 */
156static void sched_info_arrive(struct task_struct *t)
157{
158 unsigned long long now = sched_clock(), delta = 0;
159
160 if (t->sched_info.last_queued)
161 delta = now - t->sched_info.last_queued;
162 sched_info_dequeued(t);
163 t->sched_info.run_delay += delta;
164 t->sched_info.last_arrival = now;
165 t->sched_info.pcnt++;
166
167 rq_sched_info_arrive(task_rq(t), delta);
168}
169
170/*
171 * Called when a process is queued into either the active or expired
172 * array. The time is noted and later used to determine how long we
173 * had to wait for us to reach the cpu. Since the expired queue will
174 * become the active queue after active queue is empty, without dequeuing
175 * and requeuing any tasks, we are interested in queuing to either. It
176 * is unusual but not impossible for tasks to be dequeued and immediately
177 * requeued in the same or another array: this can happen in sched_yield(),
178 * set_user_nice(), and even load_balance() as it moves tasks from runqueue
179 * to runqueue.
180 *
181 * This function is only called from enqueue_task(), but also only updates
182 * the timestamp if it is already not set. It's assumed that
183 * sched_info_dequeued() will clear that stamp when appropriate.
184 */
185static inline void sched_info_queued(struct task_struct *t)
186{
187 if (unlikely(sched_info_on()))
188 if (!t->sched_info.last_queued)
189 t->sched_info.last_queued = sched_clock();
190}
191
192/*
193 * Called when a process ceases being the active-running process, either
194 * voluntarily or involuntarily. Now we can calculate how long we ran.
195 */
196static inline void sched_info_depart(struct task_struct *t)
197{
198 unsigned long long delta = sched_clock() - t->sched_info.last_arrival;
199
200 t->sched_info.cpu_time += delta;
201 rq_sched_info_depart(task_rq(t), delta);
202}
203
204/*
205 * Called when tasks are switched involuntarily due, typically, to expiring
206 * their time slice. (This may also be called when switching to or from
207 * the idle task.) We are only called when prev != next.
208 */
209static inline void
210__sched_info_switch(struct task_struct *prev, struct task_struct *next)
211{
212 struct rq *rq = task_rq(prev);
213
214 /*
215 * prev now departs the cpu. It's not interesting to record
216 * stats about how efficient we were at scheduling the idle
217 * process, however.
218 */
219 if (prev != rq->idle)
220 sched_info_depart(prev);
221
222 if (next != rq->idle)
223 sched_info_arrive(next);
224}
225static inline void
226sched_info_switch(struct task_struct *prev, struct task_struct *next)
227{
228 if (unlikely(sched_info_on()))
229 __sched_info_switch(prev, next);
230}
231#else
232#define sched_info_queued(t) do { } while (0)
233#define sched_info_switch(t, next) do { } while (0)
234#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
235