aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/kernel/topology.c
diff options
context:
space:
mode:
authorVincent Guittot <vincent.guittot@linaro.org>2012-07-10 09:13:12 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2012-07-12 15:38:12 -0400
commit339ca09d7adac80eda8d097ab473c6c23ee86b17 (patch)
tree6ba3ddf0732b8ffe368edac9f062cb628b8636d6 /arch/arm/kernel/topology.c
parentcb75dacb39494164e6b1f7aa747fb639bf18584c (diff)
ARM: 7463/1: topology: Update cpu_power according to DT information
Use cpu compatibility field and clock-frequency field of DT to estimate the capacity of each core of the system and to update the cpu_power field accordingly. This patch enables to put more running tasks on big cores than on LITTLE ones. But this patch doesn't ensure that long running tasks will run on big cores and short ones on LITTLE cores. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Reviewed-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm/kernel/topology.c')
-rw-r--r--arch/arm/kernel/topology.c153
1 files changed, 153 insertions, 0 deletions
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index eb5fc8132c02..198b08456e90 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -17,7 +17,9 @@
17#include <linux/percpu.h> 17#include <linux/percpu.h>
18#include <linux/node.h> 18#include <linux/node.h>
19#include <linux/nodemask.h> 19#include <linux/nodemask.h>
20#include <linux/of.h>
20#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/slab.h>
21 23
22#include <asm/cputype.h> 24#include <asm/cputype.h>
23#include <asm/topology.h> 25#include <asm/topology.h>
@@ -49,6 +51,152 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
49 per_cpu(cpu_scale, cpu) = power; 51 per_cpu(cpu_scale, cpu) = power;
50} 52}
51 53
54#ifdef CONFIG_OF
55struct cpu_efficiency {
56 const char *compatible;
57 unsigned long efficiency;
58};
59
60/*
61 * Table of relative efficiency of each processors
62 * The efficiency value must fit in 20bit and the final
63 * cpu_scale value must be in the range
64 * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
65 * in order to return at most 1 when DIV_ROUND_CLOSEST
66 * is used to compute the capacity of a CPU.
67 * Processors that are not defined in the table,
68 * use the default SCHED_POWER_SCALE value for cpu_scale.
69 */
70struct cpu_efficiency table_efficiency[] = {
71 {"arm,cortex-a15", 3891},
72 {"arm,cortex-a7", 2048},
73 {NULL, },
74};
75
76struct cpu_capacity {
77 unsigned long hwid;
78 unsigned long capacity;
79};
80
81struct cpu_capacity *cpu_capacity;
82
83unsigned long middle_capacity = 1;
84
85/*
86 * Iterate all CPUs' descriptor in DT and compute the efficiency
87 * (as per table_efficiency). Also calculate a middle efficiency
88 * as close as possible to (max{eff_i} - min{eff_i}) / 2
89 * This is later used to scale the cpu_power field such that an
90 * 'average' CPU is of middle power. Also see the comments near
91 * table_efficiency[] and update_cpu_power().
92 */
93static void __init parse_dt_topology(void)
94{
95 struct cpu_efficiency *cpu_eff;
96 struct device_node *cn = NULL;
97 unsigned long min_capacity = (unsigned long)(-1);
98 unsigned long max_capacity = 0;
99 unsigned long capacity = 0;
100 int alloc_size, cpu = 0;
101
102 alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity);
103 cpu_capacity = (struct cpu_capacity *)kzalloc(alloc_size, GFP_NOWAIT);
104
105 while ((cn = of_find_node_by_type(cn, "cpu"))) {
106 const u32 *rate, *reg;
107 int len;
108
109 if (cpu >= num_possible_cpus())
110 break;
111
112 for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
113 if (of_device_is_compatible(cn, cpu_eff->compatible))
114 break;
115
116 if (cpu_eff->compatible == NULL)
117 continue;
118
119 rate = of_get_property(cn, "clock-frequency", &len);
120 if (!rate || len != 4) {
121 pr_err("%s missing clock-frequency property\n",
122 cn->full_name);
123 continue;
124 }
125
126 reg = of_get_property(cn, "reg", &len);
127 if (!reg || len != 4) {
128 pr_err("%s missing reg property\n", cn->full_name);
129 continue;
130 }
131
132 capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
133
134 /* Save min capacity of the system */
135 if (capacity < min_capacity)
136 min_capacity = capacity;
137
138 /* Save max capacity of the system */
139 if (capacity > max_capacity)
140 max_capacity = capacity;
141
142 cpu_capacity[cpu].capacity = capacity;
143 cpu_capacity[cpu++].hwid = be32_to_cpup(reg);
144 }
145
146 if (cpu < num_possible_cpus())
147 cpu_capacity[cpu].hwid = (unsigned long)(-1);
148
149 /* If min and max capacities are equals, we bypass the update of the
150 * cpu_scale because all CPUs have the same capacity. Otherwise, we
151 * compute a middle_capacity factor that will ensure that the capacity
152 * of an 'average' CPU of the system will be as close as possible to
153 * SCHED_POWER_SCALE, which is the default value, but with the
154 * constraint explained near table_efficiency[].
155 */
156 if (min_capacity == max_capacity)
157 cpu_capacity[0].hwid = (unsigned long)(-1);
158 else if (4*max_capacity < (3*(max_capacity + min_capacity)))
159 middle_capacity = (min_capacity + max_capacity)
160 >> (SCHED_POWER_SHIFT+1);
161 else
162 middle_capacity = ((max_capacity / 3)
163 >> (SCHED_POWER_SHIFT-1)) + 1;
164
165}
166
167/*
168 * Look for a customed capacity of a CPU in the cpu_capacity table during the
169 * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
170 * function returns directly for SMP system.
171 */
172void update_cpu_power(unsigned int cpu, unsigned long hwid)
173{
174 unsigned int idx = 0;
175
176 /* look for the cpu's hwid in the cpu capacity table */
177 for (idx = 0; idx < num_possible_cpus(); idx++) {
178 if (cpu_capacity[idx].hwid == hwid)
179 break;
180
181 if (cpu_capacity[idx].hwid == -1)
182 return;
183 }
184
185 if (idx == num_possible_cpus())
186 return;
187
188 set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity);
189
190 printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
191 cpu, arch_scale_freq_power(NULL, cpu));
192}
193
194#else
195static inline void parse_dt_topology(void) {}
196static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
197#endif
198
199
52/* 200/*
53 * cpu topology management 201 * cpu topology management
54 */ 202 */
@@ -62,6 +210,7 @@ static void set_power_scale(unsigned int cpu, unsigned long power)
62 * These masks reflect the current use of the affinity levels. 210 * These masks reflect the current use of the affinity levels.
63 * The affinity level can be up to 16 bits according to ARM ARM 211 * The affinity level can be up to 16 bits according to ARM ARM
64 */ 212 */
213#define MPIDR_HWID_BITMASK 0xFFFFFF
65 214
66#define MPIDR_LEVEL0_MASK 0x3 215#define MPIDR_LEVEL0_MASK 0x3
67#define MPIDR_LEVEL0_SHIFT 0 216#define MPIDR_LEVEL0_SHIFT 0
@@ -160,6 +309,8 @@ void store_cpu_topology(unsigned int cpuid)
160 309
161 update_siblings_masks(cpuid); 310 update_siblings_masks(cpuid);
162 311
312 update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK);
313
163 printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", 314 printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
164 cpuid, cpu_topology[cpuid].thread_id, 315 cpuid, cpu_topology[cpuid].thread_id,
165 cpu_topology[cpuid].core_id, 316 cpu_topology[cpuid].core_id,
@@ -187,4 +338,6 @@ void init_cpu_topology(void)
187 set_power_scale(cpu, SCHED_POWER_SCALE); 338 set_power_scale(cpu, SCHED_POWER_SCALE);
188 } 339 }
189 smp_wmb(); 340 smp_wmb();
341
342 parse_dt_topology();
190} 343}