diff options
author | Chen, Kenneth W <kenneth.w.chen@intel.com> | 2006-10-13 13:05:45 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2007-02-06 18:04:18 -0500 |
commit | a0776ec8e97bf109e7d973d09fc3e1814eb32bfb (patch) | |
tree | 0c247bdd764fafc19390904d85acd8ef6a065595 /arch | |
parent | 62d0cfcb27cf755cebdc93ca95dabc83608007cd (diff) |
[IA64] remove per-cpu ia64_phys_stacked_size_p8
It's not efficient to use a per-cpu variable just to store
how many physical stack register a cpu has. Ever since the
incarnation of ia64 up till upcoming Montecito processor, that
variable has "glued" to 96. Having a variable in memory means
that the kernel is burning an extra cacheline access on every
syscall and kernel exit path. Such "static" value is better
served with the instruction patching utility exists today.
Convert ia64_phys_stacked_size_p8 into dynamic insn patching.
This also has a pleasant side effect of eliminating access to
per-cpu area while psr.ic=0 in the kernel exit path. (fixable
for per-cpu DTC work, but why bother?)
There are some concerns with the default value that the instruc-
tion encoded in the kernel image. It shouldn't be concerned.
The reasons are:
(1) cpu_init() is called at CPU initialization. In there, we
find out physical stack register size from PAL and patch
two instructions in kernel exit code. The code in question
can not be executed before the patching is done.
(2) current implementation stores zero in ia64_phys_stacked_size_p8,
and that's what the current kernel exit path loads the value with.
With the new code, it is equivalent that we store reg size 96
in ia64_phys_stacked_size_p8, thus creating a better safety net.
Given (1) above can never fail, having (2) is just a bonus.
All in all, this patch allow one less memory reference in the kernel
exit path, thus reducing syscall and interrupt return latency; and
avoid polluting potential useful data in the CPU cache.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/ia64/kernel/entry.S | 7 | ||||
-rw-r--r-- | arch/ia64/kernel/patch.c | 20 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 7 | ||||
-rw-r--r-- | arch/ia64/kernel/vmlinux.lds.S | 7 |
4 files changed, 34 insertions, 7 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 15234ed3a341..ac4b304bea30 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
@@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall) | |||
767 | ld8.fill r15=[r3] // M0|1 restore r15 | 767 | ld8.fill r15=[r3] // M0|1 restore r15 |
768 | mov b6=r18 // I0 restore b6 | 768 | mov b6=r18 // I0 restore b6 |
769 | 769 | ||
770 | addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A | 770 | LOAD_PHYS_STACK_REG_SIZE(r17) |
771 | mov f9=f0 // F clear f9 | 771 | mov f9=f0 // F clear f9 |
772 | (pKStk) br.cond.dpnt.many skip_rbs_switch // B | 772 | (pKStk) br.cond.dpnt.many skip_rbs_switch // B |
773 | 773 | ||
@@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall) | |||
775 | shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition | 775 | shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition |
776 | cover // B add current frame into dirty partition & set cr.ifs | 776 | cover // B add current frame into dirty partition & set cr.ifs |
777 | ;; | 777 | ;; |
778 | (pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 | ||
779 | mov r19=ar.bsp // M2 get new backing store pointer | 778 | mov r19=ar.bsp // M2 get new backing store pointer |
780 | mov f10=f0 // F clear f10 | 779 | mov f10=f0 // F clear f10 |
781 | 780 | ||
@@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) | |||
953 | shr.u r18=r19,16 // get byte size of existing "dirty" partition | 952 | shr.u r18=r19,16 // get byte size of existing "dirty" partition |
954 | ;; | 953 | ;; |
955 | mov r16=ar.bsp // get existing backing store pointer | 954 | mov r16=ar.bsp // get existing backing store pointer |
956 | addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 | 955 | LOAD_PHYS_STACK_REG_SIZE(r17) |
957 | ;; | ||
958 | ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 | ||
959 | (pKStk) br.cond.dpnt skip_rbs_switch | 956 | (pKStk) br.cond.dpnt skip_rbs_switch |
960 | 957 | ||
961 | /* | 958 | /* |
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index bc11bb096f58..e796e29f8e15 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c | |||
@@ -195,3 +195,23 @@ ia64_patch_gate (void) | |||
195 | ia64_patch_vtop(START(vtop), END(vtop)); | 195 | ia64_patch_vtop(START(vtop), END(vtop)); |
196 | ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); | 196 | ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); |
197 | } | 197 | } |
198 | |||
199 | void ia64_patch_phys_stack_reg(unsigned long val) | ||
200 | { | ||
201 | s32 * offp = (s32 *) __start___phys_stack_reg_patchlist; | ||
202 | s32 * end = (s32 *) __end___phys_stack_reg_patchlist; | ||
203 | u64 ip, mask, imm; | ||
204 | |||
205 | /* see instruction format A4: adds r1 = imm13, r3 */ | ||
206 | mask = (0x3fUL << 27) | (0x7f << 13); | ||
207 | imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13; | ||
208 | |||
209 | while (offp < end) { | ||
210 | ip = (u64) offp + *offp; | ||
211 | ia64_patch(ip, mask, imm); | ||
212 | ia64_fc(ip); | ||
213 | ++offp; | ||
214 | } | ||
215 | ia64_sync_i(); | ||
216 | ia64_srlz_i(); | ||
217 | } | ||
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index ad567b8d432e..f167b89f24eb 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c | |||
@@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void); | |||
75 | 75 | ||
76 | DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); | 76 | DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); |
77 | DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); | 77 | DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); |
78 | DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); | ||
79 | unsigned long ia64_cycles_per_usec; | 78 | unsigned long ia64_cycles_per_usec; |
80 | struct ia64_boot_param *ia64_boot_param; | 79 | struct ia64_boot_param *ia64_boot_param; |
81 | struct screen_info screen_info; | 80 | struct screen_info screen_info; |
@@ -836,6 +835,7 @@ void __cpuinit | |||
836 | cpu_init (void) | 835 | cpu_init (void) |
837 | { | 836 | { |
838 | extern void __cpuinit ia64_mmu_init (void *); | 837 | extern void __cpuinit ia64_mmu_init (void *); |
838 | static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; | ||
839 | unsigned long num_phys_stacked; | 839 | unsigned long num_phys_stacked; |
840 | pal_vm_info_2_u_t vmi; | 840 | pal_vm_info_2_u_t vmi; |
841 | unsigned int max_ctx; | 841 | unsigned int max_ctx; |
@@ -949,7 +949,10 @@ cpu_init (void) | |||
949 | num_phys_stacked = 96; | 949 | num_phys_stacked = 96; |
950 | } | 950 | } |
951 | /* size of physical stacked register partition plus 8 bytes: */ | 951 | /* size of physical stacked register partition plus 8 bytes: */ |
952 | __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; | 952 | if (num_phys_stacked > max_num_phys_stacked) { |
953 | ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8); | ||
954 | max_num_phys_stacked = num_phys_stacked; | ||
955 | } | ||
953 | platform_cpu_init(); | 956 | platform_cpu_init(); |
954 | pm_idle = default_idle; | 957 | pm_idle = default_idle; |
955 | } | 958 | } |
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index d6083a0936f4..d9599dcac787 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S | |||
@@ -78,6 +78,13 @@ SECTIONS | |||
78 | __stop___mca_table = .; | 78 | __stop___mca_table = .; |
79 | } | 79 | } |
80 | 80 | ||
81 | .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET) | ||
82 | { | ||
83 | __start___phys_stack_reg_patchlist = .; | ||
84 | *(.data.patch.phys_stack_reg) | ||
85 | __end___phys_stack_reg_patchlist = .; | ||
86 | } | ||
87 | |||
81 | /* Global data */ | 88 | /* Global data */ |
82 | _data = .; | 89 | _data = .; |
83 | 90 | ||