aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/sn
diff options
context:
space:
mode:
authorMatt Fleming <matt@codeblueprint.co.uk>2016-05-04 07:17:48 -0400
committerTony Luck <tony.luck@intel.com>2016-05-05 13:29:14 -0400
commit0b184a30d0df12f8366ce74bb9a5af2cff1fd3e3 (patch)
tree561e838529d75ea69a7f3dabfc8e7a495d303cc3 /arch/ia64/sn
parent1bba3ff90842cf55313a64a8a22e6cca0b3fdcb7 (diff)
ia64: Reduce stack usage by iterating over nodemask
GCC complains about sn2_global_tlb_purge() because of the large stack required by the function, arch/ia64/sn/kernel/sn2/sn2_smp.c: In function 'sn2_global_tlb_purge': arch/ia64/sn/kernel/sn2/sn2_smp.c:319:1: warning: the frame size of 2176 bytes is larger than 2048 bytes [-Wframe-larger-than=] 2048 bytes of the stack are consumed by the node ID array 'nasids[]'. But we don't actually need to put the ID array on the stack and can use nodemask operations. Cc: Tony Luck <tony.luck@intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Bjorn Helgaas <helgaas@kernel.org> Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c35
1 files changed, 23 insertions, 12 deletions
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index f9c8d9fc5939..c98dc965fe82 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -54,7 +54,7 @@ sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
54 volatile unsigned long *, unsigned long, 54 volatile unsigned long *, unsigned long,
55 volatile unsigned long *, unsigned long); 55 volatile unsigned long *, unsigned long);
56void 56void
57sn2_ptc_deadlock_recovery(short *, short, short, int, 57sn2_ptc_deadlock_recovery(nodemask_t, short, short, int,
58 volatile unsigned long *, unsigned long, 58 volatile unsigned long *, unsigned long,
59 volatile unsigned long *, unsigned long); 59 volatile unsigned long *, unsigned long);
60 60
@@ -169,7 +169,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
169 int use_cpu_ptcga; 169 int use_cpu_ptcga;
170 volatile unsigned long *ptc0, *ptc1; 170 volatile unsigned long *ptc0, *ptc1;
171 unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; 171 unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
172 short nasids[MAX_NUMNODES], nix; 172 short nix;
173 nodemask_t nodes_flushed; 173 nodemask_t nodes_flushed;
174 int active, max_active, deadlock, flush_opt = sn2_flush_opt; 174 int active, max_active, deadlock, flush_opt = sn2_flush_opt;
175 175
@@ -218,9 +218,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
218 } 218 }
219 219
220 itc = ia64_get_itc(); 220 itc = ia64_get_itc();
221 nix = 0; 221 nix = nodes_weight(nodes_flushed);
222 for_each_node_mask(cnode, nodes_flushed)
223 nasids[nix++] = cnodeid_to_nasid(cnode);
224 222
225 rr_value = (mm->context << 3) | REGION_NUMBER(start); 223 rr_value = (mm->context << 3) | REGION_NUMBER(start);
226 224
@@ -270,8 +268,10 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
270 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); 268 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
271 deadlock = 0; 269 deadlock = 0;
272 active = 0; 270 active = 0;
273 for (ibegin = 0, i = 0; i < nix; i++) { 271 ibegin = 0;
274 nasid = nasids[i]; 272 i = 0;
273 for_each_node_mask(cnode, nodes_flushed) {
274 nasid = cnodeid_to_nasid(cnode);
275 if (use_cpu_ptcga && unlikely(nasid == mynasid)) { 275 if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
276 ia64_ptcga(start, nbits << 2); 276 ia64_ptcga(start, nbits << 2);
277 ia64_srlz_i(); 277 ia64_srlz_i();
@@ -286,13 +286,14 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
286 if ((deadlock = wait_piowc())) { 286 if ((deadlock = wait_piowc())) {
287 if (flush_opt == 1) 287 if (flush_opt == 1)
288 goto done; 288 goto done;
289 sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); 289 sn2_ptc_deadlock_recovery(nodes_flushed, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
290 if (reset_max_active_on_deadlock()) 290 if (reset_max_active_on_deadlock())
291 max_active = 1; 291 max_active = 1;
292 } 292 }
293 active = 0; 293 active = 0;
294 ibegin = i + 1; 294 ibegin = i + 1;
295 } 295 }
296 i++;
296 } 297 }
297 start += (1UL << nbits); 298 start += (1UL << nbits);
298 } while (start < end); 299 } while (start < end);
@@ -327,11 +328,12 @@ done:
327 */ 328 */
328 329
329void 330void
330sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, 331sn2_ptc_deadlock_recovery(nodemask_t nodes, short ib, short ie, int mynasid,
331 volatile unsigned long *ptc0, unsigned long data0, 332 volatile unsigned long *ptc0, unsigned long data0,
332 volatile unsigned long *ptc1, unsigned long data1) 333 volatile unsigned long *ptc1, unsigned long data1)
333{ 334{
334 short nasid, i; 335 short nasid, i;
336 int cnode;
335 unsigned long *piows, zeroval, n; 337 unsigned long *piows, zeroval, n;
336 338
337 __this_cpu_inc(ptcstats.deadlocks); 339 __this_cpu_inc(ptcstats.deadlocks);
@@ -339,17 +341,26 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
339 piows = (unsigned long *) pda->pio_write_status_addr; 341 piows = (unsigned long *) pda->pio_write_status_addr;
340 zeroval = pda->pio_write_status_val; 342 zeroval = pda->pio_write_status_val;
341 343
344 i = 0;
345 for_each_node_mask(cnode, nodes) {
346 if (i < ib)
347 goto next;
348
349 if (i > ie)
350 break;
342 351
343 for (i=ib; i <= ie; i++) { 352 nasid = cnodeid_to_nasid(cnode);
344 nasid = nasids[i];
345 if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) 353 if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
346 continue; 354 goto next;
355
347 ptc0 = CHANGE_NASID(nasid, ptc0); 356 ptc0 = CHANGE_NASID(nasid, ptc0);
348 if (ptc1) 357 if (ptc1)
349 ptc1 = CHANGE_NASID(nasid, ptc1); 358 ptc1 = CHANGE_NASID(nasid, ptc1);
350 359
351 n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); 360 n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
352 __this_cpu_add(ptcstats.deadlocks2, n); 361 __this_cpu_add(ptcstats.deadlocks2, n);
362next:
363 i++;
353 } 364 }
354 365
355} 366}