aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel/setup.c
diff options
context:
space:
mode:
authorLorenzo Pieralisi <lorenzo.pieralisi@arm.com>2013-05-16 05:32:09 -0400
committerLorenzo Pieralisi <lorenzo.pieralisi@arm.com>2013-12-16 12:17:30 -0500
commit976d7d3f79a997b223f2ed8eabef7e12e469b5cf (patch)
treeb9164e6850160c69e4d692b45b5d389048111fb3 /arch/arm64/kernel/setup.c
parentb058450f38c337d12a6d1a1bcaa8610859055e2e (diff)
arm64: kernel: build MPIDR_EL1 hash function data structure
On ARM64 SMP systems, cores are identified by their MPIDR_EL1 register. The MPIDR_EL1 guidelines in the ARM ARM do not provide strict enforcement of MPIDR_EL1 layout, only recommendations that, if followed, split the MPIDR_EL1 on ARM 64 bit platforms in four affinity levels. In multi-cluster systems like big.LITTLE, if the affinity guidelines are followed, the MPIDR_EL1 can not be considered a linear index. This means that the association between logical CPU in the kernel and the HW CPU identifier becomes somewhat more complicated requiring methods like hashing to associate a given MPIDR_EL1 to a CPU logical index, in order for the look-up to be carried out in an efficient and scalable way. This patch provides a function in the kernel that starting from the cpu_logical_map, implement collision-free hashing of MPIDR_EL1 values by checking all significative bits of MPIDR_EL1 affinity level bitfields. The hashing can then be carried out through bits shifting and ORing; the resulting hash algorithm is a collision-free though not minimal hash that can be executed with few assembly instructions. The mpidr_el1 is filtered through a mpidr mask that is built by checking all bits that toggle in the set of MPIDR_EL1s corresponding to possible CPUs. Bits that do not toggle do not carry information so they do not contribute to the resulting hash. Pseudo code: /* check all bits that toggle, so they are required */ for (i = 1, mpidr_el1_mask = 0; i < num_possible_cpus(); i++) mpidr_el1_mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); /* * Build shifts to be applied to aff0, aff1, aff2, aff3 values to hash the * mpidr_el1 * fls() returns the last bit set in a word, 0 if none * ffs() returns the first bit set in a word, 0 if none */ fs0 = mpidr_el1_mask[7:0] ? ffs(mpidr_el1_mask[7:0]) - 1 : 0; fs1 = mpidr_el1_mask[15:8] ? ffs(mpidr_el1_mask[15:8]) - 1 : 0; fs2 = mpidr_el1_mask[23:16] ? ffs(mpidr_el1_mask[23:16]) - 1 : 0; fs3 = mpidr_el1_mask[39:32] ? ffs(mpidr_el1_mask[39:32]) - 1 : 0; ls0 = fls(mpidr_el1_mask[7:0]); ls1 = fls(mpidr_el1_mask[15:8]); ls2 = fls(mpidr_el1_mask[23:16]); ls3 = fls(mpidr_el1_mask[39:32]); bits0 = ls0 - fs0; bits1 = ls1 - fs1; bits2 = ls2 - fs2; bits3 = ls3 - fs3; aff0_shift = fs0; aff1_shift = 8 + fs1 - bits0; aff2_shift = 16 + fs2 - (bits0 + bits1); aff3_shift = 32 + fs3 - (bits0 + bits1 + bits2); u32 hash(u64 mpidr_el1) { u32 l[4]; u64 mpidr_el1_masked = mpidr_el1 & mpidr_el1_mask; l[0] = mpidr_el1_masked & 0xff; l[1] = mpidr_el1_masked & 0xff00; l[2] = mpidr_el1_masked & 0xff0000; l[3] = mpidr_el1_masked & 0xff00000000; return (l[0] >> aff0_shift | l[1] >> aff1_shift | l[2] >> aff2_shift | l[3] >> aff3_shift); } The hashing algorithm relies on the inherent properties set in the ARM ARM recommendations for the MPIDR_EL1. Exotic configurations, where for instance the MPIDR_EL1 values at a given affinity level have large holes, can end up requiring big hash tables since the compression of values that can be achieved through shifting is somewhat crippled when holes are present. Kernel warns if the number of buckets of the resulting hash table exceeds the number of possible CPUs by a factor of 4, which is a symptom of a very sparse HW MPIDR_EL1 configuration. The hash algorithm is quite simple and can easily be implemented in assembly code, to be used in code paths where the kernel virtual address space is not set-up (ie cpu_resume) and instruction and data fetches are strongly ordered so code must be compact and must carry out few data accesses. Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Diffstat (limited to 'arch/arm64/kernel/setup.c')
-rw-r--r--arch/arm64/kernel/setup.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index bd9bbd0e44ed..87ddfce35cb5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -113,6 +113,75 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
113 return phys_id == cpu_logical_map(cpu); 113 return phys_id == cpu_logical_map(cpu);
114} 114}
115 115
116struct mpidr_hash mpidr_hash;
117#ifdef CONFIG_SMP
118/**
119 * smp_build_mpidr_hash - Pre-compute shifts required at each affinity
120 * level in order to build a linear index from an
121 * MPIDR value. Resulting algorithm is a collision
122 * free hash carried out through shifting and ORing
123 */
124static void __init smp_build_mpidr_hash(void)
125{
126 u32 i, affinity, fs[4], bits[4], ls;
127 u64 mask = 0;
128 /*
129 * Pre-scan the list of MPIDRS and filter out bits that do
130 * not contribute to affinity levels, ie they never toggle.
131 */
132 for_each_possible_cpu(i)
133 mask |= (cpu_logical_map(i) ^ cpu_logical_map(0));
134 pr_debug("mask of set bits %#llx\n", mask);
135 /*
136 * Find and stash the last and first bit set at all affinity levels to
137 * check how many bits are required to represent them.
138 */
139 for (i = 0; i < 4; i++) {
140 affinity = MPIDR_AFFINITY_LEVEL(mask, i);
141 /*
142 * Find the MSB bit and LSB bits position
143 * to determine how many bits are required
144 * to express the affinity level.
145 */
146 ls = fls(affinity);
147 fs[i] = affinity ? ffs(affinity) - 1 : 0;
148 bits[i] = ls - fs[i];
149 }
150 /*
151 * An index can be created from the MPIDR_EL1 by isolating the
152 * significant bits at each affinity level and by shifting
153 * them in order to compress the 32 bits values space to a
154 * compressed set of values. This is equivalent to hashing
155 * the MPIDR_EL1 through shifting and ORing. It is a collision free
156 * hash though not minimal since some levels might contain a number
157 * of CPUs that is not an exact power of 2 and their bit
158 * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}.
159 */
160 mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0];
161 mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0];
162 mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] -
163 (bits[1] + bits[0]);
164 mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) +
165 fs[3] - (bits[2] + bits[1] + bits[0]);
166 mpidr_hash.mask = mask;
167 mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0];
168 pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n",
169 mpidr_hash.shift_aff[0],
170 mpidr_hash.shift_aff[1],
171 mpidr_hash.shift_aff[2],
172 mpidr_hash.shift_aff[3],
173 mpidr_hash.mask,
174 mpidr_hash.bits);
175 /*
176 * 4x is an arbitrary value used to warn on a hash table much bigger
177 * than expected on most systems.
178 */
179 if (mpidr_hash_size() > 4 * num_possible_cpus())
180 pr_warn("Large number of MPIDR hash buckets detected\n");
181 __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
182}
183#endif
184
116static void __init setup_processor(void) 185static void __init setup_processor(void)
117{ 186{
118 struct cpu_info *cpu_info; 187 struct cpu_info *cpu_info;
@@ -236,6 +305,7 @@ void __init setup_arch(char **cmdline_p)
236 cpu_read_bootcpu_ops(); 305 cpu_read_bootcpu_ops();
237#ifdef CONFIG_SMP 306#ifdef CONFIG_SMP
238 smp_init_cpus(); 307 smp_init_cpus();
308 smp_build_mpidr_hash();
239#endif 309#endif
240 310
241#ifdef CONFIG_VT 311#ifdef CONFIG_VT