aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/sn
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2014-08-17 13:30:47 -0400
committerTejun Heo <tj@kernel.org>2014-08-26 13:45:52 -0400
commit6065a244a039a23d933e4b803a4e052da2849208 (patch)
tree7c3db16385c805e0d0164b912abb2ae17408c779 /arch/ia64/sn
parent0bf7fcf155160fd483af7ffdc50efd4be96f1c96 (diff)
ia64: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Tony Luck <tony.luck@intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: linux-ia64@vger.kernel.org Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c28
1 files changed, 14 insertions, 14 deletions
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 68c845411624..f9c8d9fc5939 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -134,8 +134,8 @@ sn2_ipi_flush_all_tlb(struct mm_struct *mm)
134 itc = ia64_get_itc(); 134 itc = ia64_get_itc();
135 smp_flush_tlb_cpumask(*mm_cpumask(mm)); 135 smp_flush_tlb_cpumask(*mm_cpumask(mm));
136 itc = ia64_get_itc() - itc; 136 itc = ia64_get_itc() - itc;
137 __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; 137 __this_cpu_add(ptcstats.shub_ipi_flushes_itc_clocks, itc);
138 __get_cpu_var(ptcstats).shub_ipi_flushes++; 138 __this_cpu_inc(ptcstats.shub_ipi_flushes);
139} 139}
140 140
141/** 141/**
@@ -199,14 +199,14 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
199 start += (1UL << nbits); 199 start += (1UL << nbits);
200 } while (start < end); 200 } while (start < end);
201 ia64_srlz_i(); 201 ia64_srlz_i();
202 __get_cpu_var(ptcstats).ptc_l++; 202 __this_cpu_inc(ptcstats.ptc_l);
203 preempt_enable(); 203 preempt_enable();
204 return; 204 return;
205 } 205 }
206 206
207 if (atomic_read(&mm->mm_users) == 1 && mymm) { 207 if (atomic_read(&mm->mm_users) == 1 && mymm) {
208 flush_tlb_mm(mm); 208 flush_tlb_mm(mm);
209 __get_cpu_var(ptcstats).change_rid++; 209 __this_cpu_inc(ptcstats.change_rid);
210 preempt_enable(); 210 preempt_enable();
211 return; 211 return;
212 } 212 }
@@ -250,11 +250,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
250 spin_lock_irqsave(PTC_LOCK(shub1), flags); 250 spin_lock_irqsave(PTC_LOCK(shub1), flags);
251 itc2 = ia64_get_itc(); 251 itc2 = ia64_get_itc();
252 252
253 __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; 253 __this_cpu_add(ptcstats.lock_itc_clocks, itc2 - itc);
254 __get_cpu_var(ptcstats).shub_ptc_flushes++; 254 __this_cpu_inc(ptcstats.shub_ptc_flushes);
255 __get_cpu_var(ptcstats).nodes_flushed += nix; 255 __this_cpu_add(ptcstats.nodes_flushed, nix);
256 if (!mymm) 256 if (!mymm)
257 __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++; 257 __this_cpu_inc(ptcstats.shub_ptc_flushes_not_my_mm);
258 258
259 if (use_cpu_ptcga && !mymm) { 259 if (use_cpu_ptcga && !mymm) {
260 old_rr = ia64_get_rr(start); 260 old_rr = ia64_get_rr(start);
@@ -299,9 +299,9 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
299 299
300done: 300done:
301 itc2 = ia64_get_itc() - itc2; 301 itc2 = ia64_get_itc() - itc2;
302 __get_cpu_var(ptcstats).shub_itc_clocks += itc2; 302 __this_cpu_add(ptcstats.shub_itc_clocks, itc2);
303 if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) 303 if (itc2 > __this_cpu_read(ptcstats.shub_itc_clocks_max))
304 __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; 304 __this_cpu_write(ptcstats.shub_itc_clocks_max, itc2);
305 305
306 if (old_rr) { 306 if (old_rr) {
307 ia64_set_rr(start, old_rr); 307 ia64_set_rr(start, old_rr);
@@ -311,7 +311,7 @@ done:
311 spin_unlock_irqrestore(PTC_LOCK(shub1), flags); 311 spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
312 312
313 if (flush_opt == 1 && deadlock) { 313 if (flush_opt == 1 && deadlock) {
314 __get_cpu_var(ptcstats).deadlocks++; 314 __this_cpu_inc(ptcstats.deadlocks);
315 sn2_ipi_flush_all_tlb(mm); 315 sn2_ipi_flush_all_tlb(mm);
316 } 316 }
317 317
@@ -334,7 +334,7 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
334 short nasid, i; 334 short nasid, i;
335 unsigned long *piows, zeroval, n; 335 unsigned long *piows, zeroval, n;
336 336
337 __get_cpu_var(ptcstats).deadlocks++; 337 __this_cpu_inc(ptcstats.deadlocks);
338 338
339 piows = (unsigned long *) pda->pio_write_status_addr; 339 piows = (unsigned long *) pda->pio_write_status_addr;
340 zeroval = pda->pio_write_status_val; 340 zeroval = pda->pio_write_status_val;
@@ -349,7 +349,7 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
349 ptc1 = CHANGE_NASID(nasid, ptc1); 349 ptc1 = CHANGE_NASID(nasid, ptc1);
350 350
351 n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); 351 n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
352 __get_cpu_var(ptcstats).deadlocks2 += n; 352 __this_cpu_add(ptcstats.deadlocks2, n);
353 } 353 }
354 354
355} 355}