diff options
author | Matt Fleming <matt@codeblueprint.co.uk> | 2016-05-04 07:17:48 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2016-05-05 13:29:14 -0400 |
commit | 0b184a30d0df12f8366ce74bb9a5af2cff1fd3e3 (patch) | |
tree | 561e838529d75ea69a7f3dabfc8e7a495d303cc3 /arch/ia64/sn | |
parent | 1bba3ff90842cf55313a64a8a22e6cca0b3fdcb7 (diff) |
ia64: Reduce stack usage by iterating over nodemask
GCC complains about sn2_global_tlb_purge() because of the large stack
required by the function,
arch/ia64/sn/kernel/sn2/sn2_smp.c: In function 'sn2_global_tlb_purge':
arch/ia64/sn/kernel/sn2/sn2_smp.c:319:1: warning: the frame size of 2176 bytes is larger than 2048 bytes [-Wframe-larger-than=]
2048 bytes of the stack are consumed by the node ID array 'nasids[]'.
But we don't actually need to put the ID array on the stack and can
use nodemask operations.
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 35 |
1 files changed, 23 insertions, 12 deletions
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index f9c8d9fc5939..c98dc965fe82 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
@@ -54,7 +54,7 @@ sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, | |||
54 | volatile unsigned long *, unsigned long, | 54 | volatile unsigned long *, unsigned long, |
55 | volatile unsigned long *, unsigned long); | 55 | volatile unsigned long *, unsigned long); |
56 | void | 56 | void |
57 | sn2_ptc_deadlock_recovery(short *, short, short, int, | 57 | sn2_ptc_deadlock_recovery(nodemask_t, short, short, int, |
58 | volatile unsigned long *, unsigned long, | 58 | volatile unsigned long *, unsigned long, |
59 | volatile unsigned long *, unsigned long); | 59 | volatile unsigned long *, unsigned long); |
60 | 60 | ||
@@ -169,7 +169,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
169 | int use_cpu_ptcga; | 169 | int use_cpu_ptcga; |
170 | volatile unsigned long *ptc0, *ptc1; | 170 | volatile unsigned long *ptc0, *ptc1; |
171 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; | 171 | unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; |
172 | short nasids[MAX_NUMNODES], nix; | 172 | short nix; |
173 | nodemask_t nodes_flushed; | 173 | nodemask_t nodes_flushed; |
174 | int active, max_active, deadlock, flush_opt = sn2_flush_opt; | 174 | int active, max_active, deadlock, flush_opt = sn2_flush_opt; |
175 | 175 | ||
@@ -218,9 +218,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
218 | } | 218 | } |
219 | 219 | ||
220 | itc = ia64_get_itc(); | 220 | itc = ia64_get_itc(); |
221 | nix = 0; | 221 | nix = nodes_weight(nodes_flushed); |
222 | for_each_node_mask(cnode, nodes_flushed) | ||
223 | nasids[nix++] = cnodeid_to_nasid(cnode); | ||
224 | 222 | ||
225 | rr_value = (mm->context << 3) | REGION_NUMBER(start); | 223 | rr_value = (mm->context << 3) | REGION_NUMBER(start); |
226 | 224 | ||
@@ -270,8 +268,10 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
270 | data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); | 268 | data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); |
271 | deadlock = 0; | 269 | deadlock = 0; |
272 | active = 0; | 270 | active = 0; |
273 | for (ibegin = 0, i = 0; i < nix; i++) { | 271 | ibegin = 0; |
274 | nasid = nasids[i]; | 272 | i = 0; |
273 | for_each_node_mask(cnode, nodes_flushed) { | ||
274 | nasid = cnodeid_to_nasid(cnode); | ||
275 | if (use_cpu_ptcga && unlikely(nasid == mynasid)) { | 275 | if (use_cpu_ptcga && unlikely(nasid == mynasid)) { |
276 | ia64_ptcga(start, nbits << 2); | 276 | ia64_ptcga(start, nbits << 2); |
277 | ia64_srlz_i(); | 277 | ia64_srlz_i(); |
@@ -286,13 +286,14 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, | |||
286 | if ((deadlock = wait_piowc())) { | 286 | if ((deadlock = wait_piowc())) { |
287 | if (flush_opt == 1) | 287 | if (flush_opt == 1) |
288 | goto done; | 288 | goto done; |
289 | sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); | 289 | sn2_ptc_deadlock_recovery(nodes_flushed, ibegin, i, mynasid, ptc0, data0, ptc1, data1); |
290 | if (reset_max_active_on_deadlock()) | 290 | if (reset_max_active_on_deadlock()) |
291 | max_active = 1; | 291 | max_active = 1; |
292 | } | 292 | } |
293 | active = 0; | 293 | active = 0; |
294 | ibegin = i + 1; | 294 | ibegin = i + 1; |
295 | } | 295 | } |
296 | i++; | ||
296 | } | 297 | } |
297 | start += (1UL << nbits); | 298 | start += (1UL << nbits); |
298 | } while (start < end); | 299 | } while (start < end); |
@@ -327,11 +328,12 @@ done: | |||
327 | */ | 328 | */ |
328 | 329 | ||
329 | void | 330 | void |
330 | sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, | 331 | sn2_ptc_deadlock_recovery(nodemask_t nodes, short ib, short ie, int mynasid, |
331 | volatile unsigned long *ptc0, unsigned long data0, | 332 | volatile unsigned long *ptc0, unsigned long data0, |
332 | volatile unsigned long *ptc1, unsigned long data1) | 333 | volatile unsigned long *ptc1, unsigned long data1) |
333 | { | 334 | { |
334 | short nasid, i; | 335 | short nasid, i; |
336 | int cnode; | ||
335 | unsigned long *piows, zeroval, n; | 337 | unsigned long *piows, zeroval, n; |
336 | 338 | ||
337 | __this_cpu_inc(ptcstats.deadlocks); | 339 | __this_cpu_inc(ptcstats.deadlocks); |
@@ -339,17 +341,26 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, | |||
339 | piows = (unsigned long *) pda->pio_write_status_addr; | 341 | piows = (unsigned long *) pda->pio_write_status_addr; |
340 | zeroval = pda->pio_write_status_val; | 342 | zeroval = pda->pio_write_status_val; |
341 | 343 | ||
344 | i = 0; | ||
345 | for_each_node_mask(cnode, nodes) { | ||
346 | if (i < ib) | ||
347 | goto next; | ||
348 | |||
349 | if (i > ie) | ||
350 | break; | ||
342 | 351 | ||
343 | for (i=ib; i <= ie; i++) { | 352 | nasid = cnodeid_to_nasid(cnode); |
344 | nasid = nasids[i]; | ||
345 | if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) | 353 | if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) |
346 | continue; | 354 | goto next; |
355 | |||
347 | ptc0 = CHANGE_NASID(nasid, ptc0); | 356 | ptc0 = CHANGE_NASID(nasid, ptc0); |
348 | if (ptc1) | 357 | if (ptc1) |
349 | ptc1 = CHANGE_NASID(nasid, ptc1); | 358 | ptc1 = CHANGE_NASID(nasid, ptc1); |
350 | 359 | ||
351 | n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); | 360 | n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); |
352 | __this_cpu_add(ptcstats.deadlocks2, n); | 361 | __this_cpu_add(ptcstats.deadlocks2, n); |
362 | next: | ||
363 | i++; | ||
353 | } | 364 | } |
354 | 365 | ||
355 | } | 366 | } |