aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 20:02:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-07 20:02:58 -0500
commit72eb6a791459c87a0340318840bb3bd9252b627b (patch)
tree3bfb8ad99f9c7e511f37f72d57b56a2cea06d753 /mm
parent23d69b09b78c4876e134f104a3814c30747c53f1 (diff)
parent55ee4ef30241a62b700f79517e6d5ef2ddbefa67 (diff)
Merge branch 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (30 commits) gameport: use this_cpu_read instead of lookup x86: udelay: Use this_cpu_read to avoid address calculation x86: Use this_cpu_inc_return for nmi counter x86: Replace uses of current_cpu_data with this_cpu ops x86: Use this_cpu_ops to optimize code vmstat: User per cpu atomics to avoid interrupt disable / enable irq_work: Use per cpu atomics instead of regular atomics cpuops: Use cmpxchg for xchg to avoid lock semantics x86: this_cpu_cmpxchg and this_cpu_xchg operations percpu: Generic this_cpu_cmpxchg() and this_cpu_xchg support percpu,x86: relocate this_cpu_add_return() and friends connector: Use this_cpu operations xen: Use this_cpu_inc_return taskstats: Use this_cpu_ops random: Use this_cpu_inc_return fs: Use this_cpu_inc_return in buffer.c highmem: Use this_cpu_xx_return() operations vmstat: Use this_cpu_inc_return for vm statistics x86: Support for this_cpu_add, sub, dec, inc_return percpu: Generic support for this_cpu_add, sub, dec, inc_return ... Fixed up conflicts: in arch/x86/kernel/{apic/nmi.c, apic/x2apic_uv_x.c, process.c} as per Tejun.
Diffstat (limited to 'mm')
-rw-r--r--mm/percpu.c8
-rw-r--r--mm/slab.c6
-rw-r--r--mm/vmstat.c149
3 files changed, 118 insertions, 45 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 02ba91230b99..3dd4984bdef8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -293,12 +293,8 @@ static void *pcpu_mem_alloc(size_t size)
293 293
294 if (size <= PAGE_SIZE) 294 if (size <= PAGE_SIZE)
295 return kzalloc(size, GFP_KERNEL); 295 return kzalloc(size, GFP_KERNEL);
296 else { 296 else
297 void *ptr = vmalloc(size); 297 return vzalloc(size);
298 if (ptr)
299 memset(ptr, 0, size);
300 return ptr;
301 }
302} 298}
303 299
304/** 300/**
diff --git a/mm/slab.c b/mm/slab.c
index 39e92c0e6273..e9f92987954a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -829,12 +829,12 @@ static void init_reap_node(int cpu)
829 829
830static void next_reap_node(void) 830static void next_reap_node(void)
831{ 831{
832 int node = __get_cpu_var(slab_reap_node); 832 int node = __this_cpu_read(slab_reap_node);
833 833
834 node = next_node(node, node_online_map); 834 node = next_node(node, node_online_map);
835 if (unlikely(node >= MAX_NUMNODES)) 835 if (unlikely(node >= MAX_NUMNODES))
836 node = first_node(node_online_map); 836 node = first_node(node_online_map);
837 __get_cpu_var(slab_reap_node) = node; 837 __this_cpu_write(slab_reap_node, node);
838} 838}
839 839
840#else 840#else
@@ -1012,7 +1012,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
1012 */ 1012 */
1013static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) 1013static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1014{ 1014{
1015 int node = __get_cpu_var(slab_reap_node); 1015 int node = __this_cpu_read(slab_reap_node);
1016 1016
1017 if (l3->alien) { 1017 if (l3->alien) {
1018 struct array_cache *ac = l3->alien[node]; 1018 struct array_cache *ac = l3->alien[node];
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 33c33e7a0f9b..312d728976f1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -167,36 +167,24 @@ static void refresh_zone_stat_thresholds(void)
167void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 167void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
168 int delta) 168 int delta)
169{ 169{
170 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 170 struct per_cpu_pageset __percpu *pcp = zone->pageset;
171 171 s8 __percpu *p = pcp->vm_stat_diff + item;
172 s8 *p = pcp->vm_stat_diff + item;
173 long x; 172 long x;
173 long t;
174
175 x = delta + __this_cpu_read(*p);
174 176
175 x = delta + *p; 177 t = __this_cpu_read(pcp->stat_threshold);
176 178
177 if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { 179 if (unlikely(x > t || x < -t)) {
178 zone_page_state_add(x, zone, item); 180 zone_page_state_add(x, zone, item);
179 x = 0; 181 x = 0;
180 } 182 }
181 *p = x; 183 __this_cpu_write(*p, x);
182} 184}
183EXPORT_SYMBOL(__mod_zone_page_state); 185EXPORT_SYMBOL(__mod_zone_page_state);
184 186
185/* 187/*
186 * For an unknown interrupt state
187 */
188void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
189 int delta)
190{
191 unsigned long flags;
192
193 local_irq_save(flags);
194 __mod_zone_page_state(zone, item, delta);
195 local_irq_restore(flags);
196}
197EXPORT_SYMBOL(mod_zone_page_state);
198
199/*
200 * Optimized increment and decrement functions. 188 * Optimized increment and decrement functions.
201 * 189 *
202 * These are only for a single page and therefore can take a struct page * 190 * These are only for a single page and therefore can take a struct page *
@@ -221,16 +209,17 @@ EXPORT_SYMBOL(mod_zone_page_state);
221 */ 209 */
222void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 210void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
223{ 211{
224 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 212 struct per_cpu_pageset __percpu *pcp = zone->pageset;
225 s8 *p = pcp->vm_stat_diff + item; 213 s8 __percpu *p = pcp->vm_stat_diff + item;
214 s8 v, t;
226 215
227 (*p)++; 216 v = __this_cpu_inc_return(*p);
217 t = __this_cpu_read(pcp->stat_threshold);
218 if (unlikely(v > t)) {
219 s8 overstep = t >> 1;
228 220
229 if (unlikely(*p > pcp->stat_threshold)) { 221 zone_page_state_add(v + overstep, zone, item);
230 int overstep = pcp->stat_threshold / 2; 222 __this_cpu_write(*p, -overstep);
231
232 zone_page_state_add(*p + overstep, zone, item);
233 *p = -overstep;
234 } 223 }
235} 224}
236 225
@@ -242,16 +231,17 @@ EXPORT_SYMBOL(__inc_zone_page_state);
242 231
243void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 232void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
244{ 233{
245 struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); 234 struct per_cpu_pageset __percpu *pcp = zone->pageset;
246 s8 *p = pcp->vm_stat_diff + item; 235 s8 __percpu *p = pcp->vm_stat_diff + item;
247 236 s8 v, t;
248 (*p)--;
249 237
250 if (unlikely(*p < - pcp->stat_threshold)) { 238 v = __this_cpu_dec_return(*p);
251 int overstep = pcp->stat_threshold / 2; 239 t = __this_cpu_read(pcp->stat_threshold);
240 if (unlikely(v < - t)) {
241 s8 overstep = t >> 1;
252 242
253 zone_page_state_add(*p - overstep, zone, item); 243 zone_page_state_add(v - overstep, zone, item);
254 *p = overstep; 244 __this_cpu_write(*p, overstep);
255 } 245 }
256} 246}
257 247
@@ -261,6 +251,92 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
261} 251}
262EXPORT_SYMBOL(__dec_zone_page_state); 252EXPORT_SYMBOL(__dec_zone_page_state);
263 253
254#ifdef CONFIG_CMPXCHG_LOCAL
255/*
256 * If we have cmpxchg_local support then we do not need to incur the overhead
257 * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
258 *
259 * mod_state() modifies the zone counter state through atomic per cpu
260 * operations.
261 *
262 * Overstep mode specifies how overstep should handled:
263 * 0 No overstepping
264 * 1 Overstepping half of threshold
265 * -1 Overstepping minus half of threshold
266*/
267static inline void mod_state(struct zone *zone,
268 enum zone_stat_item item, int delta, int overstep_mode)
269{
270 struct per_cpu_pageset __percpu *pcp = zone->pageset;
271 s8 __percpu *p = pcp->vm_stat_diff + item;
272 long o, n, t, z;
273
274 do {
275 z = 0; /* overflow to zone counters */
276
277 /*
278 * The fetching of the stat_threshold is racy. We may apply
279 * a counter threshold to the wrong the cpu if we get
280 * rescheduled while executing here. However, the following
281 * will apply the threshold again and therefore bring the
282 * counter under the threshold.
283 */
284 t = this_cpu_read(pcp->stat_threshold);
285
286 o = this_cpu_read(*p);
287 n = delta + o;
288
289 if (n > t || n < -t) {
290 int os = overstep_mode * (t >> 1) ;
291
292 /* Overflow must be added to zone counters */
293 z = n + os;
294 n = -os;
295 }
296 } while (this_cpu_cmpxchg(*p, o, n) != o);
297
298 if (z)
299 zone_page_state_add(z, zone, item);
300}
301
302void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
303 int delta)
304{
305 mod_state(zone, item, delta, 0);
306}
307EXPORT_SYMBOL(mod_zone_page_state);
308
309void inc_zone_state(struct zone *zone, enum zone_stat_item item)
310{
311 mod_state(zone, item, 1, 1);
312}
313
314void inc_zone_page_state(struct page *page, enum zone_stat_item item)
315{
316 mod_state(page_zone(page), item, 1, 1);
317}
318EXPORT_SYMBOL(inc_zone_page_state);
319
320void dec_zone_page_state(struct page *page, enum zone_stat_item item)
321{
322 mod_state(page_zone(page), item, -1, -1);
323}
324EXPORT_SYMBOL(dec_zone_page_state);
325#else
326/*
327 * Use interrupt disable to serialize counter updates
328 */
329void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
330 int delta)
331{
332 unsigned long flags;
333
334 local_irq_save(flags);
335 __mod_zone_page_state(zone, item, delta);
336 local_irq_restore(flags);
337}
338EXPORT_SYMBOL(mod_zone_page_state);
339
264void inc_zone_state(struct zone *zone, enum zone_stat_item item) 340void inc_zone_state(struct zone *zone, enum zone_stat_item item)
265{ 341{
266 unsigned long flags; 342 unsigned long flags;
@@ -291,6 +367,7 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
291 local_irq_restore(flags); 367 local_irq_restore(flags);
292} 368}
293EXPORT_SYMBOL(dec_zone_page_state); 369EXPORT_SYMBOL(dec_zone_page_state);
370#endif
294 371
295/* 372/*
296 * Update the zone counters for one cpu. 373 * Update the zone counters for one cpu.