diff options
author | Nathan Zimmer <nzimmer@sgi.com> | 2013-02-21 18:15:08 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-02-22 04:27:24 -0500 |
commit | cb152ff26717961b10d0888cd983ba284cb99cd1 (patch) | |
tree | fa23de9f176321a54e60529e21389f1ac930bc30 | |
parent | 1c3e826482ab698e418c7a894440e62c76aac893 (diff) |
sched: Fix /proc/sched_stat failure on very very large systems
On systems with 4096 cores doing a cat /proc/sched_stat fails,
because we are trying to push all the data into a single kmalloc
buffer.
The issue is on these very large machines all the data will not
fit in 4mb.
A better solution is to not use the single_open() mechanism but
to provide our own seq_operations.
The output should be identical to previous version and thus not
need the version number.
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Nathan Zimmer <nzimmer@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
[ Fix memleak]
[ Fix spello in comment]
[ Fix warnings]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/stats.c | 79 |
1 files changed, 59 insertions, 20 deletions
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 903ffa9e8872..e036eda1a9c9 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c | |||
@@ -21,14 +21,17 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
21 | if (mask_str == NULL) | 21 | if (mask_str == NULL) |
22 | return -ENOMEM; | 22 | return -ENOMEM; |
23 | 23 | ||
24 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | 24 | if (v == (void *)1) { |
25 | seq_printf(seq, "timestamp %lu\n", jiffies); | 25 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); |
26 | for_each_online_cpu(cpu) { | 26 | seq_printf(seq, "timestamp %lu\n", jiffies); |
27 | struct rq *rq = cpu_rq(cpu); | 27 | } else { |
28 | struct rq *rq; | ||
28 | #ifdef CONFIG_SMP | 29 | #ifdef CONFIG_SMP |
29 | struct sched_domain *sd; | 30 | struct sched_domain *sd; |
30 | int dcount = 0; | 31 | int dcount = 0; |
31 | #endif | 32 | #endif |
33 | cpu = (unsigned long)(v - 2); | ||
34 | rq = cpu_rq(cpu); | ||
32 | 35 | ||
33 | /* runqueue-specific stats */ | 36 | /* runqueue-specific stats */ |
34 | seq_printf(seq, | 37 | seq_printf(seq, |
@@ -77,30 +80,66 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
77 | return 0; | 80 | return 0; |
78 | } | 81 | } |
79 | 82 | ||
80 | static int schedstat_open(struct inode *inode, struct file *file) | 83 | /* |
84 | * This itererator needs some explanation. | ||
85 | * It returns 1 for the header position. | ||
86 | * This means 2 is cpu 0. | ||
87 | * In a hotplugged system some cpus, including cpu 0, may be missing so we have | ||
88 | * to use cpumask_* to iterate over the cpus. | ||
89 | */ | ||
90 | static void *schedstat_start(struct seq_file *file, loff_t *offset) | ||
81 | { | 91 | { |
82 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | 92 | unsigned long n = *offset; |
83 | char *buf = kmalloc(size, GFP_KERNEL); | ||
84 | struct seq_file *m; | ||
85 | int res; | ||
86 | 93 | ||
87 | if (!buf) | 94 | if (n == 0) |
88 | return -ENOMEM; | 95 | return (void *) 1; |
89 | res = single_open(file, show_schedstat, NULL); | 96 | |
90 | if (!res) { | 97 | n--; |
91 | m = file->private_data; | 98 | |
92 | m->buf = buf; | 99 | if (n > 0) |
93 | m->size = size; | 100 | n = cpumask_next(n - 1, cpu_online_mask); |
94 | } else | 101 | else |
95 | kfree(buf); | 102 | n = cpumask_first(cpu_online_mask); |
96 | return res; | 103 | |
104 | *offset = n + 1; | ||
105 | |||
106 | if (n < nr_cpu_ids) | ||
107 | return (void *)(unsigned long)(n + 2); | ||
108 | return NULL; | ||
109 | } | ||
110 | |||
111 | static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset) | ||
112 | { | ||
113 | (*offset)++; | ||
114 | return schedstat_start(file, offset); | ||
115 | } | ||
116 | |||
117 | static void schedstat_stop(struct seq_file *file, void *data) | ||
118 | { | ||
119 | } | ||
120 | |||
121 | static const struct seq_operations schedstat_sops = { | ||
122 | .start = schedstat_start, | ||
123 | .next = schedstat_next, | ||
124 | .stop = schedstat_stop, | ||
125 | .show = show_schedstat, | ||
126 | }; | ||
127 | |||
128 | static int schedstat_open(struct inode *inode, struct file *file) | ||
129 | { | ||
130 | return seq_open(file, &schedstat_sops); | ||
97 | } | 131 | } |
98 | 132 | ||
133 | static int schedstat_release(struct inode *inode, struct file *file) | ||
134 | { | ||
135 | return 0; | ||
136 | }; | ||
137 | |||
99 | static const struct file_operations proc_schedstat_operations = { | 138 | static const struct file_operations proc_schedstat_operations = { |
100 | .open = schedstat_open, | 139 | .open = schedstat_open, |
101 | .read = seq_read, | 140 | .read = seq_read, |
102 | .llseek = seq_lseek, | 141 | .llseek = seq_lseek, |
103 | .release = single_release, | 142 | .release = schedstat_release, |
104 | }; | 143 | }; |
105 | 144 | ||
106 | static int __init proc_schedstat_init(void) | 145 | static int __init proc_schedstat_init(void) |