aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-10-13 13:05:45 -0400
committerTony Luck <tony.luck@intel.com>2007-02-06 18:04:18 -0500
commita0776ec8e97bf109e7d973d09fc3e1814eb32bfb (patch)
tree0c247bdd764fafc19390904d85acd8ef6a065595
parent62d0cfcb27cf755cebdc93ca95dabc83608007cd (diff)
[IA64] remove per-cpu ia64_phys_stacked_size_p8
It's not efficient to use a per-cpu variable just to store how many physical stack register a cpu has. Ever since the incarnation of ia64 up till upcoming Montecito processor, that variable has "glued" to 96. Having a variable in memory means that the kernel is burning an extra cacheline access on every syscall and kernel exit path. Such "static" value is better served with the instruction patching utility exists today. Convert ia64_phys_stacked_size_p8 into dynamic insn patching. This also has a pleasant side effect of eliminating access to per-cpu area while psr.ic=0 in the kernel exit path. (fixable for per-cpu DTC work, but why bother?) There are some concerns with the default value that the instruc- tion encoded in the kernel image. It shouldn't be concerned. The reasons are: (1) cpu_init() is called at CPU initialization. In there, we find out physical stack register size from PAL and patch two instructions in kernel exit code. The code in question can not be executed before the patching is done. (2) current implementation stores zero in ia64_phys_stacked_size_p8, and that's what the current kernel exit path loads the value with. With the new code, it is equivalent that we store reg size 96 in ia64_phys_stacked_size_p8, thus creating a better safety net. Given (1) above can never fail, having (2) is just a bonus. All in all, this patch allow one less memory reference in the kernel exit path, thus reducing syscall and interrupt return latency; and avoid polluting potential useful data in the CPU cache. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/entry.S7
-rw-r--r--arch/ia64/kernel/patch.c20
-rw-r--r--arch/ia64/kernel/setup.c7
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S7
-rw-r--r--include/asm-ia64/asmmacro.h10
-rw-r--r--include/asm-ia64/patch.h1
-rw-r--r--include/asm-ia64/processor.h1
-rw-r--r--include/asm-ia64/sections.h1
8 files changed, 47 insertions, 7 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 15234ed3a341..ac4b304bea30 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall)
767 ld8.fill r15=[r3] // M0|1 restore r15 767 ld8.fill r15=[r3] // M0|1 restore r15
768 mov b6=r18 // I0 restore b6 768 mov b6=r18 // I0 restore b6
769 769
770 addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A 770 LOAD_PHYS_STACK_REG_SIZE(r17)
771 mov f9=f0 // F clear f9 771 mov f9=f0 // F clear f9
772(pKStk) br.cond.dpnt.many skip_rbs_switch // B 772(pKStk) br.cond.dpnt.many skip_rbs_switch // B
773 773
@@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall)
775 shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition 775 shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
776 cover // B add current frame into dirty partition & set cr.ifs 776 cover // B add current frame into dirty partition & set cr.ifs
777 ;; 777 ;;
778(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
779 mov r19=ar.bsp // M2 get new backing store pointer 778 mov r19=ar.bsp // M2 get new backing store pointer
780 mov f10=f0 // F clear f10 779 mov f10=f0 // F clear f10
781 780
@@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
953 shr.u r18=r19,16 // get byte size of existing "dirty" partition 952 shr.u r18=r19,16 // get byte size of existing "dirty" partition
954 ;; 953 ;;
955 mov r16=ar.bsp // get existing backing store pointer 954 mov r16=ar.bsp // get existing backing store pointer
956 addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 955 LOAD_PHYS_STACK_REG_SIZE(r17)
957 ;;
958 ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
959(pKStk) br.cond.dpnt skip_rbs_switch 956(pKStk) br.cond.dpnt skip_rbs_switch
960 957
961 /* 958 /*
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index bc11bb096f58..e796e29f8e15 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -195,3 +195,23 @@ ia64_patch_gate (void)
195 ia64_patch_vtop(START(vtop), END(vtop)); 195 ia64_patch_vtop(START(vtop), END(vtop));
196 ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); 196 ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
197} 197}
198
199void ia64_patch_phys_stack_reg(unsigned long val)
200{
201 s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
202 s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
203 u64 ip, mask, imm;
204
205 /* see instruction format A4: adds r1 = imm13, r3 */
206 mask = (0x3fUL << 27) | (0x7f << 13);
207 imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
208
209 while (offp < end) {
210 ip = (u64) offp + *offp;
211 ia64_patch(ip, mask, imm);
212 ia64_fc(ip);
213 ++offp;
214 }
215 ia64_sync_i();
216 ia64_srlz_i();
217}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index ad567b8d432e..f167b89f24eb 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void);
75 75
76DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); 76DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
77DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); 77DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
78DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
79unsigned long ia64_cycles_per_usec; 78unsigned long ia64_cycles_per_usec;
80struct ia64_boot_param *ia64_boot_param; 79struct ia64_boot_param *ia64_boot_param;
81struct screen_info screen_info; 80struct screen_info screen_info;
@@ -836,6 +835,7 @@ void __cpuinit
836cpu_init (void) 835cpu_init (void)
837{ 836{
838 extern void __cpuinit ia64_mmu_init (void *); 837 extern void __cpuinit ia64_mmu_init (void *);
838 static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
839 unsigned long num_phys_stacked; 839 unsigned long num_phys_stacked;
840 pal_vm_info_2_u_t vmi; 840 pal_vm_info_2_u_t vmi;
841 unsigned int max_ctx; 841 unsigned int max_ctx;
@@ -949,7 +949,10 @@ cpu_init (void)
949 num_phys_stacked = 96; 949 num_phys_stacked = 96;
950 } 950 }
951 /* size of physical stacked register partition plus 8 bytes: */ 951 /* size of physical stacked register partition plus 8 bytes: */
952 __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; 952 if (num_phys_stacked > max_num_phys_stacked) {
953 ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
954 max_num_phys_stacked = num_phys_stacked;
955 }
953 platform_cpu_init(); 956 platform_cpu_init();
954 pm_idle = default_idle; 957 pm_idle = default_idle;
955} 958}
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index d6083a0936f4..d9599dcac787 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -78,6 +78,13 @@ SECTIONS
78 __stop___mca_table = .; 78 __stop___mca_table = .;
79 } 79 }
80 80
81 .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
82 {
83 __start___phys_stack_reg_patchlist = .;
84 *(.data.patch.phys_stack_reg)
85 __end___phys_stack_reg_patchlist = .;
86 }
87
81 /* Global data */ 88 /* Global data */
82 _data = .; 89 _data = .;
83 90
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index c22b4658fc61..c1642fd64029 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -104,6 +104,16 @@ name:
104#endif 104#endif
105 105
106/* 106/*
107 * If physical stack register size is different from DEF_NUM_STACK_REG,
108 * dynamically patch the kernel for correct size.
109 */
110 .section ".data.patch.phys_stack_reg", "a"
111 .previous
112#define LOAD_PHYS_STACK_REG_SIZE(reg) \
113[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \
114 .xdata4 ".data.patch.phys_stack_reg", 1b-.
115
116/*
107 * Up until early 2004, use of .align within a function caused bad unwind info. 117 * Up until early 2004, use of .align within a function caused bad unwind info.
108 * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing 118 * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
109 * otherwise. 119 * otherwise.
diff --git a/include/asm-ia64/patch.h b/include/asm-ia64/patch.h
index 4797f3535e6d..a71543084fb4 100644
--- a/include/asm-ia64/patch.h
+++ b/include/asm-ia64/patch.h
@@ -20,6 +20,7 @@ extern void ia64_patch_imm60 (u64 insn_addr, u64 val); /* patch "brl" w/ip-rel
20 20
21extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end); 21extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
22extern void ia64_patch_vtop (unsigned long start, unsigned long end); 22extern void ia64_patch_vtop (unsigned long start, unsigned long end);
23extern void ia64_patch_phys_stack_reg(unsigned long val);
23extern void ia64_patch_gate (void); 24extern void ia64_patch_gate (void);
24 25
25#endif /* _ASM_IA64_PATCH_H */ 26#endif /* _ASM_IA64_PATCH_H */
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 5830d36fd8e6..88c728b9ff45 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -19,6 +19,7 @@
19#include <asm/ptrace.h> 19#include <asm/ptrace.h>
20#include <asm/ustack.h> 20#include <asm/ustack.h>
21 21
22#define IA64_NUM_PHYS_STACK_REG 96
22#define IA64_NUM_DBG_REGS 8 23#define IA64_NUM_DBG_REGS 8
23 24
24#define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000) 25#define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000)
diff --git a/include/asm-ia64/sections.h b/include/asm-ia64/sections.h
index e9eb7f62d32b..dc42a359894f 100644
--- a/include/asm-ia64/sections.h
+++ b/include/asm-ia64/sections.h
@@ -11,6 +11,7 @@
11extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[]; 11extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
12extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; 12extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
13extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; 13extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
14extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
14extern char __start_gate_section[]; 15extern char __start_gate_section[];
15extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[]; 16extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
16extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[]; 17extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];