aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/Kconfig5
-rw-r--r--include/linux/mmzone.h9
-rw-r--r--include/linux/vmstat.h129
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/slab.c1
-rw-r--r--mm/vmstat.c218
6 files changed, 359 insertions, 5 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index b487e227a1f7..47de9ee6bcd6 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -70,6 +70,11 @@ config DMA_IS_DMA32
70 bool 70 bool
71 default y 71 default y
72 72
73config DMA_IS_NORMAL
74 bool
75 depends on IA64_SGI_SN2
76 default y
77
73choice 78choice
74 prompt "System type" 79 prompt "System type"
75 default IA64_GENERIC 80 default IA64_GENERIC
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d6120fa69116..543f9e411563 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -46,6 +46,9 @@ struct zone_padding {
46#define ZONE_PADDING(name) 46#define ZONE_PADDING(name)
47#endif 47#endif
48 48
49enum zone_stat_item {
50 NR_VM_ZONE_STAT_ITEMS };
51
49struct per_cpu_pages { 52struct per_cpu_pages {
50 int count; /* number of pages in the list */ 53 int count; /* number of pages in the list */
51 int high; /* high watermark, emptying needed */ 54 int high; /* high watermark, emptying needed */
@@ -55,6 +58,10 @@ struct per_cpu_pages {
55 58
56struct per_cpu_pageset { 59struct per_cpu_pageset {
57 struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ 60 struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */
61#ifdef CONFIG_SMP
62 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
63#endif
64
58#ifdef CONFIG_NUMA 65#ifdef CONFIG_NUMA
59 unsigned long numa_hit; /* allocated in intended node */ 66 unsigned long numa_hit; /* allocated in intended node */
60 unsigned long numa_miss; /* allocated in non intended node */ 67 unsigned long numa_miss; /* allocated in non intended node */
@@ -165,6 +172,8 @@ struct zone {
165 /* A count of how many reclaimers are scanning this zone */ 172 /* A count of how many reclaimers are scanning this zone */
166 atomic_t reclaim_in_progress; 173 atomic_t reclaim_in_progress;
167 174
175 /* Zone statistics */
176 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
168 /* 177 /*
169 * timestamp (in jiffies) of the last zone reclaim that did not 178 * timestamp (in jiffies) of the last zone reclaim that did not
170 * result in freeing of pages. This is used to avoid repeated scans 179 * result in freeing of pages. This is used to avoid repeated scans
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3ca0c1989fc2..3fd5c11e544a 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -3,6 +3,9 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/percpu.h> 5#include <linux/percpu.h>
6#include <linux/config.h>
7#include <linux/mmzone.h>
8#include <asm/atomic.h>
6 9
7/* 10/*
8 * Global page accounting. One instance per CPU. Only unsigned longs are 11 * Global page accounting. One instance per CPU. Only unsigned longs are
@@ -134,5 +137,129 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
134 137
135DECLARE_PER_CPU(struct page_state, page_states); 138DECLARE_PER_CPU(struct page_state, page_states);
136 139
137#endif /* _LINUX_VMSTAT_H */ 140/*
141 * Zone based page accounting with per cpu differentials.
142 */
143extern atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
144
145static inline void zone_page_state_add(long x, struct zone *zone,
146 enum zone_stat_item item)
147{
148 atomic_long_add(x, &zone->vm_stat[item]);
149 atomic_long_add(x, &vm_stat[item]);
150}
151
152static inline unsigned long global_page_state(enum zone_stat_item item)
153{
154 long x = atomic_long_read(&vm_stat[item]);
155#ifdef CONFIG_SMP
156 if (x < 0)
157 x = 0;
158#endif
159 return x;
160}
161
162static inline unsigned long zone_page_state(struct zone *zone,
163 enum zone_stat_item item)
164{
165 long x = atomic_long_read(&zone->vm_stat[item]);
166#ifdef CONFIG_SMP
167 if (x < 0)
168 x = 0;
169#endif
170 return x;
171}
172
173#ifdef CONFIG_NUMA
174/*
175 * Determine the per node value of a stat item. This function
176 * is called frequently in a NUMA machine, so try to be as
177 * frugal as possible.
178 */
179static inline unsigned long node_page_state(int node,
180 enum zone_stat_item item)
181{
182 struct zone *zones = NODE_DATA(node)->node_zones;
183
184 return
185#ifndef CONFIG_DMA_IS_NORMAL
186#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
187 zone_page_state(&zones[ZONE_DMA32], item) +
188#endif
189 zone_page_state(&zones[ZONE_NORMAL], item) +
190#endif
191#ifdef CONFIG_HIGHMEM
192 zone_page_state(&zones[ZONE_HIGHMEM], item) +
193#endif
194 zone_page_state(&zones[ZONE_DMA], item);
195}
196#else
197#define node_page_state(node, item) global_page_state(item)
198#endif
199
200#define __add_zone_page_state(__z, __i, __d) \
201 __mod_zone_page_state(__z, __i, __d)
202#define __sub_zone_page_state(__z, __i, __d) \
203 __mod_zone_page_state(__z, __i,-(__d))
204
205#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
206#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
207
208static inline void zap_zone_vm_stats(struct zone *zone)
209{
210 memset(zone->vm_stat, 0, sizeof(zone->vm_stat));
211}
212
213#ifdef CONFIG_SMP
214void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int);
215void __inc_zone_page_state(struct page *, enum zone_stat_item);
216void __dec_zone_page_state(struct page *, enum zone_stat_item);
138 217
218void mod_zone_page_state(struct zone *, enum zone_stat_item, int);
219void inc_zone_page_state(struct page *, enum zone_stat_item);
220void dec_zone_page_state(struct page *, enum zone_stat_item);
221
222extern void inc_zone_state(struct zone *, enum zone_stat_item);
223
224void refresh_cpu_vm_stats(int);
225void refresh_vm_stats(void);
226
227#else /* CONFIG_SMP */
228
229/*
230 * We do not maintain differentials in a single processor configuration.
231 * The functions directly modify the zone and global counters.
232 */
233static inline void __mod_zone_page_state(struct zone *zone,
234 enum zone_stat_item item, int delta)
235{
236 zone_page_state_add(delta, zone, item);
237}
238
239static inline void __inc_zone_page_state(struct page *page,
240 enum zone_stat_item item)
241{
242 atomic_long_inc(&page_zone(page)->vm_stat[item]);
243 atomic_long_inc(&vm_stat[item]);
244}
245
246static inline void __dec_zone_page_state(struct page *page,
247 enum zone_stat_item item)
248{
249 atomic_long_dec(&page_zone(page)->vm_stat[item]);
250 atomic_long_dec(&vm_stat[item]);
251}
252
253/*
254 * We only use atomic operations to update counters. So there is no need to
255 * disable interrupts.
256 */
257#define inc_zone_page_state __inc_zone_page_state
258#define dec_zone_page_state __dec_zone_page_state
259#define mod_zone_page_state __mod_zone_page_state
260
261static inline void refresh_cpu_vm_stats(int cpu) { }
262static inline void refresh_vm_stats(void) { }
263#endif
264
265#endif /* _LINUX_VMSTAT_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 87dc1297fe39..3a877fecc300 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2045,6 +2045,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2045 zone->nr_scan_inactive = 0; 2045 zone->nr_scan_inactive = 0;
2046 zone->nr_active = 0; 2046 zone->nr_active = 0;
2047 zone->nr_inactive = 0; 2047 zone->nr_inactive = 0;
2048 zap_zone_vm_stats(zone);
2048 atomic_set(&zone->reclaim_in_progress, 0); 2049 atomic_set(&zone->reclaim_in_progress, 0);
2049 if (!size) 2050 if (!size)
2050 continue; 2051 continue;
@@ -2147,6 +2148,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
2147 } 2148 }
2148 2149
2149 local_irq_enable(); 2150 local_irq_enable();
2151 refresh_cpu_vm_stats(cpu);
2150 } 2152 }
2151 return NOTIFY_OK; 2153 return NOTIFY_OK;
2152} 2154}
diff --git a/mm/slab.c b/mm/slab.c
index 233e39d14caf..0c33820038cb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3763,6 +3763,7 @@ next:
3763 check_irq_on(); 3763 check_irq_on();
3764 mutex_unlock(&cache_chain_mutex); 3764 mutex_unlock(&cache_chain_mutex);
3765 next_reap_node(); 3765 next_reap_node();
3766 refresh_cpu_vm_stats(smp_processor_id());
3766 /* Set up the next iteration */ 3767 /* Set up the next iteration */
3767 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3768 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3768} 3769}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ad456202ff1a..210f9bbbb04f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -3,10 +3,15 @@
3 * 3 *
4 * Manages VM statistics 4 * Manages VM statistics
5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 5 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
6 *
7 * zoned VM statistics
8 * Copyright (C) 2006 Silicon Graphics, Inc.,
9 * Christoph Lameter <christoph@lameter.com>
6 */ 10 */
7 11
8#include <linux/config.h> 12#include <linux/config.h>
9#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/module.h>
10 15
11/* 16/*
12 * Accumulate the page_state information across all CPUs. 17 * Accumulate the page_state information across all CPUs.
@@ -143,6 +148,197 @@ void get_zone_counts(unsigned long *active,
143 } 148 }
144} 149}
145 150
151/*
152 * Manage combined zone based / global counters
153 *
154 * vm_stat contains the global counters
155 */
156atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
157EXPORT_SYMBOL(vm_stat);
158
159#ifdef CONFIG_SMP
160
161#define STAT_THRESHOLD 32
162
163/*
164 * Determine pointer to currently valid differential byte given a zone and
165 * the item number.
166 *
167 * Preemption must be off
168 */
169static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
170{
171 return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
172}
173
174/*
175 * For use when we know that interrupts are disabled.
176 */
177void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
178 int delta)
179{
180 s8 *p;
181 long x;
182
183 p = diff_pointer(zone, item);
184 x = delta + *p;
185
186 if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
187 zone_page_state_add(x, zone, item);
188 x = 0;
189 }
190
191 *p = x;
192}
193EXPORT_SYMBOL(__mod_zone_page_state);
194
195/*
196 * For an unknown interrupt state
197 */
198void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
199 int delta)
200{
201 unsigned long flags;
202
203 local_irq_save(flags);
204 __mod_zone_page_state(zone, item, delta);
205 local_irq_restore(flags);
206}
207EXPORT_SYMBOL(mod_zone_page_state);
208
209/*
210 * Optimized increment and decrement functions.
211 *
212 * These are only for a single page and therefore can take a struct page *
213 * argument instead of struct zone *. This allows the inclusion of the code
214 * generated for page_zone(page) into the optimized functions.
215 *
216 * No overflow check is necessary and therefore the differential can be
217 * incremented or decremented in place which may allow the compilers to
218 * generate better code.
219 *
220 * The increment or decrement is known and therefore one boundary check can
221 * be omitted.
222 *
223 * Some processors have inc/dec instructions that are atomic vs an interrupt.
224 * However, the code must first determine the differential location in a zone
225 * based on the processor number and then inc/dec the counter. There is no
226 * guarantee without disabling preemption that the processor will not change
227 * in between and therefore the atomicity vs. interrupt cannot be exploited
228 * in a useful way here.
229 */
230void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
231{
232 struct zone *zone = page_zone(page);
233 s8 *p = diff_pointer(zone, item);
234
235 (*p)++;
236
237 if (unlikely(*p > STAT_THRESHOLD)) {
238 zone_page_state_add(*p, zone, item);
239 *p = 0;
240 }
241}
242EXPORT_SYMBOL(__inc_zone_page_state);
243
244void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
245{
246 struct zone *zone = page_zone(page);
247 s8 *p = diff_pointer(zone, item);
248
249 (*p)--;
250
251 if (unlikely(*p < -STAT_THRESHOLD)) {
252 zone_page_state_add(*p, zone, item);
253 *p = 0;
254 }
255}
256EXPORT_SYMBOL(__dec_zone_page_state);
257
258void inc_zone_page_state(struct page *page, enum zone_stat_item item)
259{
260 unsigned long flags;
261 struct zone *zone;
262 s8 *p;
263
264 zone = page_zone(page);
265 local_irq_save(flags);
266 p = diff_pointer(zone, item);
267
268 (*p)++;
269
270 if (unlikely(*p > STAT_THRESHOLD)) {
271 zone_page_state_add(*p, zone, item);
272 *p = 0;
273 }
274 local_irq_restore(flags);
275}
276EXPORT_SYMBOL(inc_zone_page_state);
277
278void dec_zone_page_state(struct page *page, enum zone_stat_item item)
279{
280 unsigned long flags;
281 struct zone *zone;
282 s8 *p;
283
284 zone = page_zone(page);
285 local_irq_save(flags);
286 p = diff_pointer(zone, item);
287
288 (*p)--;
289
290 if (unlikely(*p < -STAT_THRESHOLD)) {
291 zone_page_state_add(*p, zone, item);
292 *p = 0;
293 }
294 local_irq_restore(flags);
295}
296EXPORT_SYMBOL(dec_zone_page_state);
297
298/*
299 * Update the zone counters for one cpu.
300 */
301void refresh_cpu_vm_stats(int cpu)
302{
303 struct zone *zone;
304 int i;
305 unsigned long flags;
306
307 for_each_zone(zone) {
308 struct per_cpu_pageset *pcp;
309
310 pcp = zone_pcp(zone, cpu);
311
312 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
313 if (pcp->vm_stat_diff[i]) {
314 local_irq_save(flags);
315 zone_page_state_add(pcp->vm_stat_diff[i],
316 zone, i);
317 pcp->vm_stat_diff[i] = 0;
318 local_irq_restore(flags);
319 }
320 }
321}
322
323static void __refresh_cpu_vm_stats(void *dummy)
324{
325 refresh_cpu_vm_stats(smp_processor_id());
326}
327
328/*
329 * Consolidate all counters.
330 *
331 * Note that the result is less inaccurate but still inaccurate
332 * if concurrent processes are allowed to run.
333 */
334void refresh_vm_stats(void)
335{
336 on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
337}
338EXPORT_SYMBOL(refresh_vm_stats);
339
340#endif
341
146#ifdef CONFIG_PROC_FS 342#ifdef CONFIG_PROC_FS
147 343
148#include <linux/seq_file.h> 344#include <linux/seq_file.h>
@@ -204,6 +400,9 @@ struct seq_operations fragmentation_op = {
204}; 400};
205 401
206static char *vmstat_text[] = { 402static char *vmstat_text[] = {
403 /* Zoned VM counters */
404
405 /* Page state */
207 "nr_dirty", 406 "nr_dirty",
208 "nr_writeback", 407 "nr_writeback",
209 "nr_unstable", 408 "nr_unstable",
@@ -297,6 +496,11 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
297 zone->nr_scan_active, zone->nr_scan_inactive, 496 zone->nr_scan_active, zone->nr_scan_inactive,
298 zone->spanned_pages, 497 zone->spanned_pages,
299 zone->present_pages); 498 zone->present_pages);
499
500 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
501 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
502 zone_page_state(zone, i));
503
300 seq_printf(m, 504 seq_printf(m,
301 "\n protection: (%lu", 505 "\n protection: (%lu",
302 zone->lowmem_reserve[0]); 506 zone->lowmem_reserve[0]);
@@ -368,19 +572,25 @@ struct seq_operations zoneinfo_op = {
368 572
369static void *vmstat_start(struct seq_file *m, loff_t *pos) 573static void *vmstat_start(struct seq_file *m, loff_t *pos)
370{ 574{
575 unsigned long *v;
371 struct page_state *ps; 576 struct page_state *ps;
577 int i;
372 578
373 if (*pos >= ARRAY_SIZE(vmstat_text)) 579 if (*pos >= ARRAY_SIZE(vmstat_text))
374 return NULL; 580 return NULL;
375 581
376 ps = kmalloc(sizeof(*ps), GFP_KERNEL); 582 v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
377 m->private = ps; 583 + sizeof(*ps), GFP_KERNEL);
378 if (!ps) 584 m->private = v;
585 if (!v)
379 return ERR_PTR(-ENOMEM); 586 return ERR_PTR(-ENOMEM);
587 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
588 v[i] = global_page_state(i);
589 ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS);
380 get_full_page_state(ps); 590 get_full_page_state(ps);
381 ps->pgpgin /= 2; /* sectors -> kbytes */ 591 ps->pgpgin /= 2; /* sectors -> kbytes */
382 ps->pgpgout /= 2; 592 ps->pgpgout /= 2;
383 return (unsigned long *)ps + *pos; 593 return v + *pos;
384} 594}
385 595
386static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 596static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)