aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmstat.c
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2010-12-14 11:28:46 -0500
committerTejun Heo <tj@kernel.org>2010-12-18 09:54:49 -0500
commit7c83912062c801738d7d19acaf8f7fec25ea663c (patch)
tree52b696a502b871da55fc877ddd8d0e4a271511ad /mm/vmstat.c
parent20b876918c065818b3574a426d418f68b4f8ad19 (diff)
vmstat: User per cpu atomics to avoid interrupt disable / enable
Currently the operations to increment vm counters must disable interrupts in order to not mess up their housekeeping of counters. So use this_cpu_cmpxchg() to avoid the overhead. Since we can no longer count on preremption being disabled we still have some minor issues. The fetching of the counter thresholds is racy. A threshold from another cpu may be applied if we happen to be rescheduled on another cpu. However, the following vmstat operation will then bring the counter again under the threshold limit. The operations for __xxx_zone_state are not changed since the caller has taken care of the synchronization needs (and therefore the cycle count is even less than the optimized version for the irq disable case provided here). The optimization using this_cpu_cmpxchg will only be used if the arch supports efficient this_cpu_ops (must have CONFIG_CMPXCHG_LOCAL set!) The use of this_cpu_cmpxchg reduces the cycle count for the counter operations by %80 (inc_zone_page_state goes from 170 cycles to 32). Signed-off-by: Christoph Lameter <cl@linux.com>
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c101
1 files changed, 87 insertions, 14 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f9a7bc89fd10..7329eb8a08aa 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -185,20 +185,6 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
185EXPORT_SYMBOL(__mod_zone_page_state); 185EXPORT_SYMBOL(__mod_zone_page_state);
186 186
187/* 187/*
188 * For an unknown interrupt state
189 */
190void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
191 int delta)
192{
193 unsigned long flags;
194
195 local_irq_save(flags);
196 __mod_zone_page_state(zone, item, delta);
197 local_irq_restore(flags);
198}
199EXPORT_SYMBOL(mod_zone_page_state);
200
201/*
202 * Optimized increment and decrement functions. 188 * Optimized increment and decrement functions.
203 * 189 *
204 * These are only for a single page and therefore can take a struct page * 190 * These are only for a single page and therefore can take a struct page *
@@ -265,6 +251,92 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
265} 251}
266EXPORT_SYMBOL(__dec_zone_page_state); 252EXPORT_SYMBOL(__dec_zone_page_state);
267 253
254#ifdef CONFIG_CMPXCHG_LOCAL
255/*
256 * If we have cmpxchg_local support then we do not need to incur the overhead
257 * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
258 *
259 * mod_state() modifies the zone counter state through atomic per cpu
260 * operations.
261 *
262 * Overstep mode specifies how overstep should handled:
263 * 0 No overstepping
264 * 1 Overstepping half of threshold
265 * -1 Overstepping minus half of threshold
266*/
267static inline void mod_state(struct zone *zone,
268 enum zone_stat_item item, int delta, int overstep_mode)
269{
270 struct per_cpu_pageset __percpu *pcp = zone->pageset;
271 s8 __percpu *p = pcp->vm_stat_diff + item;
272 long o, n, t, z;
273
274 do {
275 z = 0; /* overflow to zone counters */
276
277 /*
278 * The fetching of the stat_threshold is racy. We may apply
279 * a counter threshold to the wrong the cpu if we get
280 * rescheduled while executing here. However, the following
281 * will apply the threshold again and therefore bring the
282 * counter under the threshold.
283 */
284 t = this_cpu_read(pcp->stat_threshold);
285
286 o = this_cpu_read(*p);
287 n = delta + o;
288
289 if (n > t || n < -t) {
290 int os = overstep_mode * (t >> 1) ;
291
292 /* Overflow must be added to zone counters */
293 z = n + os;
294 n = -os;
295 }
296 } while (this_cpu_cmpxchg(*p, o, n) != o);
297
298 if (z)
299 zone_page_state_add(z, zone, item);
300}
301
302void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
303 int delta)
304{
305 mod_state(zone, item, delta, 0);
306}
307EXPORT_SYMBOL(mod_zone_page_state);
308
309void inc_zone_state(struct zone *zone, enum zone_stat_item item)
310{
311 mod_state(zone, item, 1, 1);
312}
313
314void inc_zone_page_state(struct page *page, enum zone_stat_item item)
315{
316 mod_state(page_zone(page), item, 1, 1);
317}
318EXPORT_SYMBOL(inc_zone_page_state);
319
320void dec_zone_page_state(struct page *page, enum zone_stat_item item)
321{
322 mod_state(page_zone(page), item, -1, -1);
323}
324EXPORT_SYMBOL(dec_zone_page_state);
325#else
326/*
327 * Use interrupt disable to serialize counter updates
328 */
329void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
330 int delta)
331{
332 unsigned long flags;
333
334 local_irq_save(flags);
335 __mod_zone_page_state(zone, item, delta);
336 local_irq_restore(flags);
337}
338EXPORT_SYMBOL(mod_zone_page_state);
339
268void inc_zone_state(struct zone *zone, enum zone_stat_item item) 340void inc_zone_state(struct zone *zone, enum zone_stat_item item)
269{ 341{
270 unsigned long flags; 342 unsigned long flags;
@@ -295,6 +367,7 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
295 local_irq_restore(flags); 367 local_irq_restore(flags);
296} 368}
297EXPORT_SYMBOL(dec_zone_page_state); 369EXPORT_SYMBOL(dec_zone_page_state);
370#endif
298 371
299/* 372/*
300 * Update the zone counters for one cpu. 373 * Update the zone counters for one cpu.