aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-04 11:56:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-04 11:56:53 -0400
commit22a093b2fb52fb656658a32adc80c24ddc200ca4 (patch)
treed0edd1065ee10369ca204404e43bdeece71d3d5e
parentc1776a18e3b5a3559f3dff5df0ecce570abd3a9f (diff)
parent397f2378f136128623fc237746157aa2564d1082 (diff)
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Debug info and other statistics fixes and related enhancements" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/numa: Fix numa balancing stats in /proc/pid/sched sched/numa: Show numa_group ID in /proc/sched_debug task listings sched/debug: Move print_cfs_rq() declaration to kernel/sched/sched.h sched/stat: Expose /proc/pid/schedstat if CONFIG_SCHED_INFO=y sched/stat: Simplify the sched_info accounting dependency
-rw-r--r--fs/proc/base.c11
-rw-r--r--include/linux/sched.h8
-rw-r--r--init/Kconfig1
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/debug.c40
-rw-r--r--kernel/sched/fair.c22
-rw-r--r--kernel/sched/sched.h13
-rw-r--r--kernel/sched/stats.h4
-rw-r--r--lib/Kconfig.debug5
9 files changed, 71 insertions, 35 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1d540b3f226f..87782e874b6a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -491,14 +491,17 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
491} 491}
492#endif 492#endif
493 493
494#ifdef CONFIG_SCHEDSTATS 494#ifdef CONFIG_SCHED_INFO
495/* 495/*
496 * Provides /proc/PID/schedstat 496 * Provides /proc/PID/schedstat
497 */ 497 */
498static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 498static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
499 struct pid *pid, struct task_struct *task) 499 struct pid *pid, struct task_struct *task)
500{ 500{
501 seq_printf(m, "%llu %llu %lu\n", 501 if (unlikely(!sched_info_on()))
502 seq_printf(m, "0 0 0\n");
503 else
504 seq_printf(m, "%llu %llu %lu\n",
502 (unsigned long long)task->se.sum_exec_runtime, 505 (unsigned long long)task->se.sum_exec_runtime,
503 (unsigned long long)task->sched_info.run_delay, 506 (unsigned long long)task->sched_info.run_delay,
504 task->sched_info.pcount); 507 task->sched_info.pcount);
@@ -2787,7 +2790,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2787#ifdef CONFIG_STACKTRACE 2790#ifdef CONFIG_STACKTRACE
2788 ONE("stack", S_IRUSR, proc_pid_stack), 2791 ONE("stack", S_IRUSR, proc_pid_stack),
2789#endif 2792#endif
2790#ifdef CONFIG_SCHEDSTATS 2793#ifdef CONFIG_SCHED_INFO
2791 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 2794 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
2792#endif 2795#endif
2793#ifdef CONFIG_LATENCYTOP 2796#ifdef CONFIG_LATENCYTOP
@@ -3135,7 +3138,7 @@ static const struct pid_entry tid_base_stuff[] = {
3135#ifdef CONFIG_STACKTRACE 3138#ifdef CONFIG_STACKTRACE
3136 ONE("stack", S_IRUSR, proc_pid_stack), 3139 ONE("stack", S_IRUSR, proc_pid_stack),
3137#endif 3140#endif
3138#ifdef CONFIG_SCHEDSTATS 3141#ifdef CONFIG_SCHED_INFO
3139 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3142 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
3140#endif 3143#endif
3141#ifdef CONFIG_LATENCYTOP 3144#ifdef CONFIG_LATENCYTOP
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8aa4a251742f..ae21f1591615 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -192,8 +192,6 @@ struct task_group;
192#ifdef CONFIG_SCHED_DEBUG 192#ifdef CONFIG_SCHED_DEBUG
193extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); 193extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
194extern void proc_sched_set_task(struct task_struct *p); 194extern void proc_sched_set_task(struct task_struct *p);
195extern void
196print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
197#endif 195#endif
198 196
199/* 197/*
@@ -838,7 +836,7 @@ extern struct user_struct root_user;
838struct backing_dev_info; 836struct backing_dev_info;
839struct reclaim_state; 837struct reclaim_state;
840 838
841#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 839#ifdef CONFIG_SCHED_INFO
842struct sched_info { 840struct sched_info {
843 /* cumulative counters */ 841 /* cumulative counters */
844 unsigned long pcount; /* # of times run on this cpu */ 842 unsigned long pcount; /* # of times run on this cpu */
@@ -848,7 +846,7 @@ struct sched_info {
848 unsigned long long last_arrival,/* when we last ran on a cpu */ 846 unsigned long long last_arrival,/* when we last ran on a cpu */
849 last_queued; /* when we were last queued to run */ 847 last_queued; /* when we were last queued to run */
850}; 848};
851#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ 849#endif /* CONFIG_SCHED_INFO */
852 850
853#ifdef CONFIG_TASK_DELAY_ACCT 851#ifdef CONFIG_TASK_DELAY_ACCT
854struct task_delay_info { 852struct task_delay_info {
@@ -1397,7 +1395,7 @@ struct task_struct {
1397 int rcu_tasks_idle_cpu; 1395 int rcu_tasks_idle_cpu;
1398#endif /* #ifdef CONFIG_TASKS_RCU */ 1396#endif /* #ifdef CONFIG_TASKS_RCU */
1399 1397
1400#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1398#ifdef CONFIG_SCHED_INFO
1401 struct sched_info sched_info; 1399 struct sched_info sched_info;
1402#endif 1400#endif
1403 1401
diff --git a/init/Kconfig b/init/Kconfig
index 53c41a84dd03..af09b4fb43d2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -435,6 +435,7 @@ config TASKSTATS
435config TASK_DELAY_ACCT 435config TASK_DELAY_ACCT
436 bool "Enable per-task delay accounting" 436 bool "Enable per-task delay accounting"
437 depends on TASKSTATS 437 depends on TASKSTATS
438 select SCHED_INFO
438 help 439 help
439 Collect information on time spent by a task waiting for system 440 Collect information on time spent by a task waiting for system
440 resources like cpu, synchronous block I/O completion and swapping 441 resources like cpu, synchronous block I/O completion and swapping
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b803e1b8ab0c..5caa029dec5d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2164,7 +2164,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2164 set_task_cpu(p, cpu); 2164 set_task_cpu(p, cpu);
2165 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 2165 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2166 2166
2167#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2167#ifdef CONFIG_SCHED_INFO
2168 if (likely(sched_info_on())) 2168 if (likely(sched_info_on()))
2169 memset(&p->sched_info, 0, sizeof(p->sched_info)); 2169 memset(&p->sched_info, 0, sizeof(p->sched_info));
2170#endif 2170#endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 315c68e015d9..4222ec50ab88 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -142,7 +142,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
142 0LL, 0L); 142 0LL, 0L);
143#endif 143#endif
144#ifdef CONFIG_NUMA_BALANCING 144#ifdef CONFIG_NUMA_BALANCING
145 SEQ_printf(m, " %d", task_node(p)); 145 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
146#endif 146#endif
147#ifdef CONFIG_CGROUP_SCHED 147#ifdef CONFIG_CGROUP_SCHED
148 SEQ_printf(m, " %s", task_group_path(task_group(p))); 148 SEQ_printf(m, " %s", task_group_path(task_group(p)));
@@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs);
517 SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) 517 SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
518 518
519 519
520#ifdef CONFIG_NUMA_BALANCING
521void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
522 unsigned long tpf, unsigned long gsf, unsigned long gpf)
523{
524 SEQ_printf(m, "numa_faults node=%d ", node);
525 SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf);
526 SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf);
527}
528#endif
529
530
520static void sched_show_numa(struct task_struct *p, struct seq_file *m) 531static void sched_show_numa(struct task_struct *p, struct seq_file *m)
521{ 532{
522#ifdef CONFIG_NUMA_BALANCING 533#ifdef CONFIG_NUMA_BALANCING
523 struct mempolicy *pol; 534 struct mempolicy *pol;
524 int node, i;
525 535
526 if (p->mm) 536 if (p->mm)
527 P(mm->numa_scan_seq); 537 P(mm->numa_scan_seq);
@@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
533 mpol_get(pol); 543 mpol_get(pol);
534 task_unlock(p); 544 task_unlock(p);
535 545
536 SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0)); 546 P(numa_pages_migrated);
537 547 P(numa_preferred_nid);
538 for_each_online_node(node) { 548 P(total_numa_faults);
539 for (i = 0; i < 2; i++) { 549 SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
540 unsigned long nr_faults = -1; 550 task_node(p), task_numa_group_id(p));
541 int cpu_current, home_node; 551 show_numa_stats(p, m);
542
543 if (p->numa_faults)
544 nr_faults = p->numa_faults[2*node + i];
545
546 cpu_current = !i ? (task_node(p) == node) :
547 (pol && node_isset(node, pol->v.nodes));
548
549 home_node = (p->numa_preferred_nid == node);
550
551 SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n",
552 i, node, cpu_current, home_node, nr_faults);
553 }
554 }
555
556 mpol_put(pol); 552 mpol_put(pol);
557#endif 553#endif
558} 554}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3d57cc0ca0a6..65c8f3ebdc3c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8473,7 +8473,27 @@ void print_cfs_stats(struct seq_file *m, int cpu)
8473 print_cfs_rq(m, cpu, cfs_rq); 8473 print_cfs_rq(m, cpu, cfs_rq);
8474 rcu_read_unlock(); 8474 rcu_read_unlock();
8475} 8475}
8476#endif 8476
8477#ifdef CONFIG_NUMA_BALANCING
8478void show_numa_stats(struct task_struct *p, struct seq_file *m)
8479{
8480 int node;
8481 unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
8482
8483 for_each_online_node(node) {
8484 if (p->numa_faults) {
8485 tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
8486 tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
8487 }
8488 if (p->numa_group) {
8489 gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
8490 gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
8491 }
8492 print_numa_stats(m, node, tsf, tpf, gsf, gpf);
8493 }
8494}
8495#endif /* CONFIG_NUMA_BALANCING */
8496#endif /* CONFIG_SCHED_DEBUG */
8477 8497
8478__init void init_sched_fair_class(void) 8498__init void init_sched_fair_class(void)
8479{ 8499{
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 885889190a1f..84d48790bb6d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1689,9 +1689,22 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1689 1689
1690extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); 1690extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
1691extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); 1691extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
1692
1693#ifdef CONFIG_SCHED_DEBUG
1692extern void print_cfs_stats(struct seq_file *m, int cpu); 1694extern void print_cfs_stats(struct seq_file *m, int cpu);
1693extern void print_rt_stats(struct seq_file *m, int cpu); 1695extern void print_rt_stats(struct seq_file *m, int cpu);
1694extern void print_dl_stats(struct seq_file *m, int cpu); 1696extern void print_dl_stats(struct seq_file *m, int cpu);
1697extern void
1698print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
1699
1700#ifdef CONFIG_NUMA_BALANCING
1701extern void
1702show_numa_stats(struct task_struct *p, struct seq_file *m);
1703extern void
1704print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
1705 unsigned long tpf, unsigned long gsf, unsigned long gpf);
1706#endif /* CONFIG_NUMA_BALANCING */
1707#endif /* CONFIG_SCHED_DEBUG */
1695 1708
1696extern void init_cfs_rq(struct cfs_rq *cfs_rq); 1709extern void init_cfs_rq(struct cfs_rq *cfs_rq);
1697extern void init_rt_rq(struct rt_rq *rt_rq); 1710extern void init_rt_rq(struct rt_rq *rt_rq);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 077ebbd5e10f..b0fbc7632de5 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -47,7 +47,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
47# define schedstat_set(var, val) do { } while (0) 47# define schedstat_set(var, val) do { } while (0)
48#endif 48#endif
49 49
50#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 50#ifdef CONFIG_SCHED_INFO
51static inline void sched_info_reset_dequeued(struct task_struct *t) 51static inline void sched_info_reset_dequeued(struct task_struct *t)
52{ 52{
53 t->sched_info.last_queued = 0; 53 t->sched_info.last_queued = 0;
@@ -156,7 +156,7 @@ sched_info_switch(struct rq *rq,
156#define sched_info_depart(rq, t) do { } while (0) 156#define sched_info_depart(rq, t) do { } while (0)
157#define sched_info_arrive(rq, next) do { } while (0) 157#define sched_info_arrive(rq, next) do { } while (0)
158#define sched_info_switch(rq, t, next) do { } while (0) 158#define sched_info_switch(rq, t, next) do { } while (0)
159#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ 159#endif /* CONFIG_SCHED_INFO */
160 160
161/* 161/*
162 * The following are functions that support scheduler-internal time accounting. 162 * The following are functions that support scheduler-internal time accounting.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b908048f8d6a..e2894b23efb6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -841,9 +841,14 @@ config SCHED_DEBUG
841 that can help debug the scheduler. The runtime overhead of this 841 that can help debug the scheduler. The runtime overhead of this
842 option is minimal. 842 option is minimal.
843 843
844config SCHED_INFO
845 bool
846 default n
847
844config SCHEDSTATS 848config SCHEDSTATS
845 bool "Collect scheduler statistics" 849 bool "Collect scheduler statistics"
846 depends on DEBUG_KERNEL && PROC_FS 850 depends on DEBUG_KERNEL && PROC_FS
851 select SCHED_INFO
847 help 852 help
848 If you say Y here, additional code will be inserted into the 853 If you say Y here, additional code will be inserted into the
849 scheduler and related routines to collect statistics about 854 scheduler and related routines to collect statistics about