diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 234 | ||||
-rw-r--r-- | kernel/sched_stats.h | 235 |
2 files changed, 236 insertions, 233 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index ac4d26241d1e..f8b8eda4494d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -460,134 +460,6 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | |||
460 | spin_unlock_irqrestore(&rq->lock, *flags); | 460 | spin_unlock_irqrestore(&rq->lock, *flags); |
461 | } | 461 | } |
462 | 462 | ||
463 | #ifdef CONFIG_SCHEDSTATS | ||
464 | /* | ||
465 | * bump this up when changing the output format or the meaning of an existing | ||
466 | * format, so that tools can adapt (or abort) | ||
467 | */ | ||
468 | #define SCHEDSTAT_VERSION 14 | ||
469 | |||
470 | static int show_schedstat(struct seq_file *seq, void *v) | ||
471 | { | ||
472 | int cpu; | ||
473 | |||
474 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | ||
475 | seq_printf(seq, "timestamp %lu\n", jiffies); | ||
476 | for_each_online_cpu(cpu) { | ||
477 | struct rq *rq = cpu_rq(cpu); | ||
478 | #ifdef CONFIG_SMP | ||
479 | struct sched_domain *sd; | ||
480 | int dcnt = 0; | ||
481 | #endif | ||
482 | |||
483 | /* runqueue-specific stats */ | ||
484 | seq_printf(seq, | ||
485 | "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | ||
486 | cpu, rq->yld_both_empty, | ||
487 | rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, | ||
488 | rq->sched_switch, rq->sched_cnt, rq->sched_goidle, | ||
489 | rq->ttwu_cnt, rq->ttwu_local, | ||
490 | rq->rq_sched_info.cpu_time, | ||
491 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); | ||
492 | |||
493 | seq_printf(seq, "\n"); | ||
494 | |||
495 | #ifdef CONFIG_SMP | ||
496 | /* domain-specific stats */ | ||
497 | preempt_disable(); | ||
498 | for_each_domain(cpu, sd) { | ||
499 | enum cpu_idle_type itype; | ||
500 | char mask_str[NR_CPUS]; | ||
501 | |||
502 | cpumask_scnprintf(mask_str, NR_CPUS, sd->span); | ||
503 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | ||
504 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; | ||
505 | itype++) { | ||
506 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " | ||
507 | "%lu", | ||
508 | sd->lb_cnt[itype], | ||
509 | sd->lb_balanced[itype], | ||
510 | sd->lb_failed[itype], | ||
511 | sd->lb_imbalance[itype], | ||
512 | sd->lb_gained[itype], | ||
513 | sd->lb_hot_gained[itype], | ||
514 | sd->lb_nobusyq[itype], | ||
515 | sd->lb_nobusyg[itype]); | ||
516 | } | ||
517 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" | ||
518 | " %lu %lu %lu\n", | ||
519 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | ||
520 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, | ||
521 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, | ||
522 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | ||
523 | sd->ttwu_move_balance); | ||
524 | } | ||
525 | preempt_enable(); | ||
526 | #endif | ||
527 | } | ||
528 | return 0; | ||
529 | } | ||
530 | |||
531 | static int schedstat_open(struct inode *inode, struct file *file) | ||
532 | { | ||
533 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | ||
534 | char *buf = kmalloc(size, GFP_KERNEL); | ||
535 | struct seq_file *m; | ||
536 | int res; | ||
537 | |||
538 | if (!buf) | ||
539 | return -ENOMEM; | ||
540 | res = single_open(file, show_schedstat, NULL); | ||
541 | if (!res) { | ||
542 | m = file->private_data; | ||
543 | m->buf = buf; | ||
544 | m->size = size; | ||
545 | } else | ||
546 | kfree(buf); | ||
547 | return res; | ||
548 | } | ||
549 | |||
550 | const struct file_operations proc_schedstat_operations = { | ||
551 | .open = schedstat_open, | ||
552 | .read = seq_read, | ||
553 | .llseek = seq_lseek, | ||
554 | .release = single_release, | ||
555 | }; | ||
556 | |||
557 | /* | ||
558 | * Expects runqueue lock to be held for atomicity of update | ||
559 | */ | ||
560 | static inline void | ||
561 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
562 | { | ||
563 | if (rq) { | ||
564 | rq->rq_sched_info.run_delay += delta_jiffies; | ||
565 | rq->rq_sched_info.pcnt++; | ||
566 | } | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Expects runqueue lock to be held for atomicity of update | ||
571 | */ | ||
572 | static inline void | ||
573 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
574 | { | ||
575 | if (rq) | ||
576 | rq->rq_sched_info.cpu_time += delta_jiffies; | ||
577 | } | ||
578 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | ||
579 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | ||
580 | #else /* !CONFIG_SCHEDSTATS */ | ||
581 | static inline void | ||
582 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
583 | {} | ||
584 | static inline void | ||
585 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
586 | {} | ||
587 | # define schedstat_inc(rq, field) do { } while (0) | ||
588 | # define schedstat_add(rq, field, amt) do { } while (0) | ||
589 | #endif | ||
590 | |||
591 | /* | 463 | /* |
592 | * this_rq_lock - lock this runqueue and disable interrupts. | 464 | * this_rq_lock - lock this runqueue and disable interrupts. |
593 | */ | 465 | */ |
@@ -603,111 +475,7 @@ static inline struct rq *this_rq_lock(void) | |||
603 | return rq; | 475 | return rq; |
604 | } | 476 | } |
605 | 477 | ||
606 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 478 | #include "sched_stats.h" |
607 | /* | ||
608 | * Called when a process is dequeued from the active array and given | ||
609 | * the cpu. We should note that with the exception of interactive | ||
610 | * tasks, the expired queue will become the active queue after the active | ||
611 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
612 | * expired queue. (Interactive tasks may be requeued directly to the | ||
613 | * active queue, thus delaying tasks in the expired queue from running; | ||
614 | * see scheduler_tick()). | ||
615 | * | ||
616 | * This function is only called from sched_info_arrive(), rather than | ||
617 | * dequeue_task(). Even though a task may be queued and dequeued multiple | ||
618 | * times as it is shuffled about, we're really interested in knowing how | ||
619 | * long it was from the *first* time it was queued to the time that it | ||
620 | * finally hit a cpu. | ||
621 | */ | ||
622 | static inline void sched_info_dequeued(struct task_struct *t) | ||
623 | { | ||
624 | t->sched_info.last_queued = 0; | ||
625 | } | ||
626 | |||
627 | /* | ||
628 | * Called when a task finally hits the cpu. We can now calculate how | ||
629 | * long it was waiting to run. We also note when it began so that we | ||
630 | * can keep stats on how long its timeslice is. | ||
631 | */ | ||
632 | static void sched_info_arrive(struct task_struct *t) | ||
633 | { | ||
634 | unsigned long now = jiffies, delta_jiffies = 0; | ||
635 | |||
636 | if (t->sched_info.last_queued) | ||
637 | delta_jiffies = now - t->sched_info.last_queued; | ||
638 | sched_info_dequeued(t); | ||
639 | t->sched_info.run_delay += delta_jiffies; | ||
640 | t->sched_info.last_arrival = now; | ||
641 | t->sched_info.pcnt++; | ||
642 | |||
643 | rq_sched_info_arrive(task_rq(t), delta_jiffies); | ||
644 | } | ||
645 | |||
646 | /* | ||
647 | * Called when a process is queued into either the active or expired | ||
648 | * array. The time is noted and later used to determine how long we | ||
649 | * had to wait for us to reach the cpu. Since the expired queue will | ||
650 | * become the active queue after active queue is empty, without dequeuing | ||
651 | * and requeuing any tasks, we are interested in queuing to either. It | ||
652 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
653 | * requeued in the same or another array: this can happen in sched_yield(), | ||
654 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
655 | * to runqueue. | ||
656 | * | ||
657 | * This function is only called from enqueue_task(), but also only updates | ||
658 | * the timestamp if it is already not set. It's assumed that | ||
659 | * sched_info_dequeued() will clear that stamp when appropriate. | ||
660 | */ | ||
661 | static inline void sched_info_queued(struct task_struct *t) | ||
662 | { | ||
663 | if (unlikely(sched_info_on())) | ||
664 | if (!t->sched_info.last_queued) | ||
665 | t->sched_info.last_queued = jiffies; | ||
666 | } | ||
667 | |||
668 | /* | ||
669 | * Called when a process ceases being the active-running process, either | ||
670 | * voluntarily or involuntarily. Now we can calculate how long we ran. | ||
671 | */ | ||
672 | static inline void sched_info_depart(struct task_struct *t) | ||
673 | { | ||
674 | unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; | ||
675 | |||
676 | t->sched_info.cpu_time += delta_jiffies; | ||
677 | rq_sched_info_depart(task_rq(t), delta_jiffies); | ||
678 | } | ||
679 | |||
680 | /* | ||
681 | * Called when tasks are switched involuntarily due, typically, to expiring | ||
682 | * their time slice. (This may also be called when switching to or from | ||
683 | * the idle task.) We are only called when prev != next. | ||
684 | */ | ||
685 | static inline void | ||
686 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
687 | { | ||
688 | struct rq *rq = task_rq(prev); | ||
689 | |||
690 | /* | ||
691 | * prev now departs the cpu. It's not interesting to record | ||
692 | * stats about how efficient we were at scheduling the idle | ||
693 | * process, however. | ||
694 | */ | ||
695 | if (prev != rq->idle) | ||
696 | sched_info_depart(prev); | ||
697 | |||
698 | if (next != rq->idle) | ||
699 | sched_info_arrive(next); | ||
700 | } | ||
701 | static inline void | ||
702 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
703 | { | ||
704 | if (unlikely(sched_info_on())) | ||
705 | __sched_info_switch(prev, next); | ||
706 | } | ||
707 | #else | ||
708 | #define sched_info_queued(t) do { } while (0) | ||
709 | #define sched_info_switch(t, next) do { } while (0) | ||
710 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | ||
711 | 479 | ||
712 | /* | 480 | /* |
713 | * Adding/removing a task to/from a priority array: | 481 | * Adding/removing a task to/from a priority array: |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h new file mode 100644 index 000000000000..cd82c6078904 --- /dev/null +++ b/kernel/sched_stats.h | |||
@@ -0,0 +1,235 @@ | |||
1 | |||
2 | #ifdef CONFIG_SCHEDSTATS | ||
3 | /* | ||
4 | * bump this up when changing the output format or the meaning of an existing | ||
5 | * format, so that tools can adapt (or abort) | ||
6 | */ | ||
7 | #define SCHEDSTAT_VERSION 14 | ||
8 | |||
9 | static int show_schedstat(struct seq_file *seq, void *v) | ||
10 | { | ||
11 | int cpu; | ||
12 | |||
13 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | ||
14 | seq_printf(seq, "timestamp %lu\n", jiffies); | ||
15 | for_each_online_cpu(cpu) { | ||
16 | struct rq *rq = cpu_rq(cpu); | ||
17 | #ifdef CONFIG_SMP | ||
18 | struct sched_domain *sd; | ||
19 | int dcnt = 0; | ||
20 | #endif | ||
21 | |||
22 | /* runqueue-specific stats */ | ||
23 | seq_printf(seq, | ||
24 | "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | ||
25 | cpu, rq->yld_both_empty, | ||
26 | rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt, | ||
27 | rq->sched_switch, rq->sched_cnt, rq->sched_goidle, | ||
28 | rq->ttwu_cnt, rq->ttwu_local, | ||
29 | rq->rq_sched_info.cpu_time, | ||
30 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt); | ||
31 | |||
32 | seq_printf(seq, "\n"); | ||
33 | |||
34 | #ifdef CONFIG_SMP | ||
35 | /* domain-specific stats */ | ||
36 | preempt_disable(); | ||
37 | for_each_domain(cpu, sd) { | ||
38 | enum cpu_idle_type itype; | ||
39 | char mask_str[NR_CPUS]; | ||
40 | |||
41 | cpumask_scnprintf(mask_str, NR_CPUS, sd->span); | ||
42 | seq_printf(seq, "domain%d %s", dcnt++, mask_str); | ||
43 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; | ||
44 | itype++) { | ||
45 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu " | ||
46 | "%lu", | ||
47 | sd->lb_cnt[itype], | ||
48 | sd->lb_balanced[itype], | ||
49 | sd->lb_failed[itype], | ||
50 | sd->lb_imbalance[itype], | ||
51 | sd->lb_gained[itype], | ||
52 | sd->lb_hot_gained[itype], | ||
53 | sd->lb_nobusyq[itype], | ||
54 | sd->lb_nobusyg[itype]); | ||
55 | } | ||
56 | seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu" | ||
57 | " %lu %lu %lu\n", | ||
58 | sd->alb_cnt, sd->alb_failed, sd->alb_pushed, | ||
59 | sd->sbe_cnt, sd->sbe_balanced, sd->sbe_pushed, | ||
60 | sd->sbf_cnt, sd->sbf_balanced, sd->sbf_pushed, | ||
61 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | ||
62 | sd->ttwu_move_balance); | ||
63 | } | ||
64 | preempt_enable(); | ||
65 | #endif | ||
66 | } | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static int schedstat_open(struct inode *inode, struct file *file) | ||
71 | { | ||
72 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | ||
73 | char *buf = kmalloc(size, GFP_KERNEL); | ||
74 | struct seq_file *m; | ||
75 | int res; | ||
76 | |||
77 | if (!buf) | ||
78 | return -ENOMEM; | ||
79 | res = single_open(file, show_schedstat, NULL); | ||
80 | if (!res) { | ||
81 | m = file->private_data; | ||
82 | m->buf = buf; | ||
83 | m->size = size; | ||
84 | } else | ||
85 | kfree(buf); | ||
86 | return res; | ||
87 | } | ||
88 | |||
89 | const struct file_operations proc_schedstat_operations = { | ||
90 | .open = schedstat_open, | ||
91 | .read = seq_read, | ||
92 | .llseek = seq_lseek, | ||
93 | .release = single_release, | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * Expects runqueue lock to be held for atomicity of update | ||
98 | */ | ||
99 | static inline void | ||
100 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | ||
101 | { | ||
102 | if (rq) { | ||
103 | rq->rq_sched_info.run_delay += delta; | ||
104 | rq->rq_sched_info.pcnt++; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Expects runqueue lock to be held for atomicity of update | ||
110 | */ | ||
111 | static inline void | ||
112 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | ||
113 | { | ||
114 | if (rq) | ||
115 | rq->rq_sched_info.cpu_time += delta; | ||
116 | } | ||
117 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | ||
118 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | ||
119 | #else /* !CONFIG_SCHEDSTATS */ | ||
120 | static inline void | ||
121 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | ||
122 | {} | ||
123 | static inline void | ||
124 | rq_sched_info_depart(struct rq *rq, unsigned long long delta) | ||
125 | {} | ||
126 | # define schedstat_inc(rq, field) do { } while (0) | ||
127 | # define schedstat_add(rq, field, amt) do { } while (0) | ||
128 | #endif | ||
129 | |||
130 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | ||
131 | /* | ||
132 | * Called when a process is dequeued from the active array and given | ||
133 | * the cpu. We should note that with the exception of interactive | ||
134 | * tasks, the expired queue will become the active queue after the active | ||
135 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
136 | * expired queue. (Interactive tasks may be requeued directly to the | ||
137 | * active queue, thus delaying tasks in the expired queue from running; | ||
138 | * see scheduler_tick()). | ||
139 | * | ||
140 | * This function is only called from sched_info_arrive(), rather than | ||
141 | * dequeue_task(). Even though a task may be queued and dequeued multiple | ||
142 | * times as it is shuffled about, we're really interested in knowing how | ||
143 | * long it was from the *first* time it was queued to the time that it | ||
144 | * finally hit a cpu. | ||
145 | */ | ||
146 | static inline void sched_info_dequeued(struct task_struct *t) | ||
147 | { | ||
148 | t->sched_info.last_queued = 0; | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * Called when a task finally hits the cpu. We can now calculate how | ||
153 | * long it was waiting to run. We also note when it began so that we | ||
154 | * can keep stats on how long its timeslice is. | ||
155 | */ | ||
156 | static void sched_info_arrive(struct task_struct *t) | ||
157 | { | ||
158 | unsigned long long now = sched_clock(), delta = 0; | ||
159 | |||
160 | if (t->sched_info.last_queued) | ||
161 | delta = now - t->sched_info.last_queued; | ||
162 | sched_info_dequeued(t); | ||
163 | t->sched_info.run_delay += delta; | ||
164 | t->sched_info.last_arrival = now; | ||
165 | t->sched_info.pcnt++; | ||
166 | |||
167 | rq_sched_info_arrive(task_rq(t), delta); | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Called when a process is queued into either the active or expired | ||
172 | * array. The time is noted and later used to determine how long we | ||
173 | * had to wait for us to reach the cpu. Since the expired queue will | ||
174 | * become the active queue after active queue is empty, without dequeuing | ||
175 | * and requeuing any tasks, we are interested in queuing to either. It | ||
176 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
177 | * requeued in the same or another array: this can happen in sched_yield(), | ||
178 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
179 | * to runqueue. | ||
180 | * | ||
181 | * This function is only called from enqueue_task(), but also only updates | ||
182 | * the timestamp if it is already not set. It's assumed that | ||
183 | * sched_info_dequeued() will clear that stamp when appropriate. | ||
184 | */ | ||
185 | static inline void sched_info_queued(struct task_struct *t) | ||
186 | { | ||
187 | if (unlikely(sched_info_on())) | ||
188 | if (!t->sched_info.last_queued) | ||
189 | t->sched_info.last_queued = sched_clock(); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Called when a process ceases being the active-running process, either | ||
194 | * voluntarily or involuntarily. Now we can calculate how long we ran. | ||
195 | */ | ||
196 | static inline void sched_info_depart(struct task_struct *t) | ||
197 | { | ||
198 | unsigned long long delta = sched_clock() - t->sched_info.last_arrival; | ||
199 | |||
200 | t->sched_info.cpu_time += delta; | ||
201 | rq_sched_info_depart(task_rq(t), delta); | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * Called when tasks are switched involuntarily due, typically, to expiring | ||
206 | * their time slice. (This may also be called when switching to or from | ||
207 | * the idle task.) We are only called when prev != next. | ||
208 | */ | ||
209 | static inline void | ||
210 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
211 | { | ||
212 | struct rq *rq = task_rq(prev); | ||
213 | |||
214 | /* | ||
215 | * prev now departs the cpu. It's not interesting to record | ||
216 | * stats about how efficient we were at scheduling the idle | ||
217 | * process, however. | ||
218 | */ | ||
219 | if (prev != rq->idle) | ||
220 | sched_info_depart(prev); | ||
221 | |||
222 | if (next != rq->idle) | ||
223 | sched_info_arrive(next); | ||
224 | } | ||
225 | static inline void | ||
226 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
227 | { | ||
228 | if (unlikely(sched_info_on())) | ||
229 | __sched_info_switch(prev, next); | ||
230 | } | ||
231 | #else | ||
232 | #define sched_info_queued(t) do { } while (0) | ||
233 | #define sched_info_switch(t, next) do { } while (0) | ||
234 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ | ||
235 | |||