diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/kernel/topology.c | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index eb5fc8132c02..198b08456e90 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c | |||
@@ -17,7 +17,9 @@ | |||
17 | #include <linux/percpu.h> | 17 | #include <linux/percpu.h> |
18 | #include <linux/node.h> | 18 | #include <linux/node.h> |
19 | #include <linux/nodemask.h> | 19 | #include <linux/nodemask.h> |
20 | #include <linux/of.h> | ||
20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/slab.h> | ||
21 | 23 | ||
22 | #include <asm/cputype.h> | 24 | #include <asm/cputype.h> |
23 | #include <asm/topology.h> | 25 | #include <asm/topology.h> |
@@ -49,6 +51,152 @@ static void set_power_scale(unsigned int cpu, unsigned long power) | |||
49 | per_cpu(cpu_scale, cpu) = power; | 51 | per_cpu(cpu_scale, cpu) = power; |
50 | } | 52 | } |
51 | 53 | ||
54 | #ifdef CONFIG_OF | ||
55 | struct cpu_efficiency { | ||
56 | const char *compatible; | ||
57 | unsigned long efficiency; | ||
58 | }; | ||
59 | |||
60 | /* | ||
61 | * Table of relative efficiency of each processors | ||
62 | * The efficiency value must fit in 20bit and the final | ||
63 | * cpu_scale value must be in the range | ||
64 | * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 | ||
65 | * in order to return at most 1 when DIV_ROUND_CLOSEST | ||
66 | * is used to compute the capacity of a CPU. | ||
67 | * Processors that are not defined in the table, | ||
68 | * use the default SCHED_POWER_SCALE value for cpu_scale. | ||
69 | */ | ||
70 | struct cpu_efficiency table_efficiency[] = { | ||
71 | {"arm,cortex-a15", 3891}, | ||
72 | {"arm,cortex-a7", 2048}, | ||
73 | {NULL, }, | ||
74 | }; | ||
75 | |||
76 | struct cpu_capacity { | ||
77 | unsigned long hwid; | ||
78 | unsigned long capacity; | ||
79 | }; | ||
80 | |||
81 | struct cpu_capacity *cpu_capacity; | ||
82 | |||
83 | unsigned long middle_capacity = 1; | ||
84 | |||
85 | /* | ||
86 | * Iterate all CPUs' descriptor in DT and compute the efficiency | ||
87 | * (as per table_efficiency). Also calculate a middle efficiency | ||
88 | * as close as possible to (max{eff_i} - min{eff_i}) / 2 | ||
89 | * This is later used to scale the cpu_power field such that an | ||
90 | * 'average' CPU is of middle power. Also see the comments near | ||
91 | * table_efficiency[] and update_cpu_power(). | ||
92 | */ | ||
93 | static void __init parse_dt_topology(void) | ||
94 | { | ||
95 | struct cpu_efficiency *cpu_eff; | ||
96 | struct device_node *cn = NULL; | ||
97 | unsigned long min_capacity = (unsigned long)(-1); | ||
98 | unsigned long max_capacity = 0; | ||
99 | unsigned long capacity = 0; | ||
100 | int alloc_size, cpu = 0; | ||
101 | |||
102 | alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity); | ||
103 | cpu_capacity = (struct cpu_capacity *)kzalloc(alloc_size, GFP_NOWAIT); | ||
104 | |||
105 | while ((cn = of_find_node_by_type(cn, "cpu"))) { | ||
106 | const u32 *rate, *reg; | ||
107 | int len; | ||
108 | |||
109 | if (cpu >= num_possible_cpus()) | ||
110 | break; | ||
111 | |||
112 | for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) | ||
113 | if (of_device_is_compatible(cn, cpu_eff->compatible)) | ||
114 | break; | ||
115 | |||
116 | if (cpu_eff->compatible == NULL) | ||
117 | continue; | ||
118 | |||
119 | rate = of_get_property(cn, "clock-frequency", &len); | ||
120 | if (!rate || len != 4) { | ||
121 | pr_err("%s missing clock-frequency property\n", | ||
122 | cn->full_name); | ||
123 | continue; | ||
124 | } | ||
125 | |||
126 | reg = of_get_property(cn, "reg", &len); | ||
127 | if (!reg || len != 4) { | ||
128 | pr_err("%s missing reg property\n", cn->full_name); | ||
129 | continue; | ||
130 | } | ||
131 | |||
132 | capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; | ||
133 | |||
134 | /* Save min capacity of the system */ | ||
135 | if (capacity < min_capacity) | ||
136 | min_capacity = capacity; | ||
137 | |||
138 | /* Save max capacity of the system */ | ||
139 | if (capacity > max_capacity) | ||
140 | max_capacity = capacity; | ||
141 | |||
142 | cpu_capacity[cpu].capacity = capacity; | ||
143 | cpu_capacity[cpu++].hwid = be32_to_cpup(reg); | ||
144 | } | ||
145 | |||
146 | if (cpu < num_possible_cpus()) | ||
147 | cpu_capacity[cpu].hwid = (unsigned long)(-1); | ||
148 | |||
149 | /* If min and max capacities are equals, we bypass the update of the | ||
150 | * cpu_scale because all CPUs have the same capacity. Otherwise, we | ||
151 | * compute a middle_capacity factor that will ensure that the capacity | ||
152 | * of an 'average' CPU of the system will be as close as possible to | ||
153 | * SCHED_POWER_SCALE, which is the default value, but with the | ||
154 | * constraint explained near table_efficiency[]. | ||
155 | */ | ||
156 | if (min_capacity == max_capacity) | ||
157 | cpu_capacity[0].hwid = (unsigned long)(-1); | ||
158 | else if (4*max_capacity < (3*(max_capacity + min_capacity))) | ||
159 | middle_capacity = (min_capacity + max_capacity) | ||
160 | >> (SCHED_POWER_SHIFT+1); | ||
161 | else | ||
162 | middle_capacity = ((max_capacity / 3) | ||
163 | >> (SCHED_POWER_SHIFT-1)) + 1; | ||
164 | |||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Look for a customed capacity of a CPU in the cpu_capacity table during the | ||
169 | * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the | ||
170 | * function returns directly for SMP system. | ||
171 | */ | ||
172 | void update_cpu_power(unsigned int cpu, unsigned long hwid) | ||
173 | { | ||
174 | unsigned int idx = 0; | ||
175 | |||
176 | /* look for the cpu's hwid in the cpu capacity table */ | ||
177 | for (idx = 0; idx < num_possible_cpus(); idx++) { | ||
178 | if (cpu_capacity[idx].hwid == hwid) | ||
179 | break; | ||
180 | |||
181 | if (cpu_capacity[idx].hwid == -1) | ||
182 | return; | ||
183 | } | ||
184 | |||
185 | if (idx == num_possible_cpus()) | ||
186 | return; | ||
187 | |||
188 | set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity); | ||
189 | |||
190 | printk(KERN_INFO "CPU%u: update cpu_power %lu\n", | ||
191 | cpu, arch_scale_freq_power(NULL, cpu)); | ||
192 | } | ||
193 | |||
194 | #else | ||
195 | static inline void parse_dt_topology(void) {} | ||
196 | static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {} | ||
197 | #endif | ||
198 | |||
199 | |||
52 | /* | 200 | /* |
53 | * cpu topology management | 201 | * cpu topology management |
54 | */ | 202 | */ |
@@ -62,6 +210,7 @@ static void set_power_scale(unsigned int cpu, unsigned long power) | |||
62 | * These masks reflect the current use of the affinity levels. | 210 | * These masks reflect the current use of the affinity levels. |
63 | * The affinity level can be up to 16 bits according to ARM ARM | 211 | * The affinity level can be up to 16 bits according to ARM ARM |
64 | */ | 212 | */ |
213 | #define MPIDR_HWID_BITMASK 0xFFFFFF | ||
65 | 214 | ||
66 | #define MPIDR_LEVEL0_MASK 0x3 | 215 | #define MPIDR_LEVEL0_MASK 0x3 |
67 | #define MPIDR_LEVEL0_SHIFT 0 | 216 | #define MPIDR_LEVEL0_SHIFT 0 |
@@ -160,6 +309,8 @@ void store_cpu_topology(unsigned int cpuid) | |||
160 | 309 | ||
161 | update_siblings_masks(cpuid); | 310 | update_siblings_masks(cpuid); |
162 | 311 | ||
312 | update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK); | ||
313 | |||
163 | printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", | 314 | printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", |
164 | cpuid, cpu_topology[cpuid].thread_id, | 315 | cpuid, cpu_topology[cpuid].thread_id, |
165 | cpu_topology[cpuid].core_id, | 316 | cpu_topology[cpuid].core_id, |
@@ -187,4 +338,6 @@ void init_cpu_topology(void) | |||
187 | set_power_scale(cpu, SCHED_POWER_SCALE); | 338 | set_power_scale(cpu, SCHED_POWER_SCALE); |
188 | } | 339 | } |
189 | smp_wmb(); | 340 | smp_wmb(); |
341 | |||
342 | parse_dt_topology(); | ||
190 | } | 343 | } |