aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/clocksource/exynos_mct.c
diff options
context:
space:
mode:
authorDoug Anderson <dianders@chromium.org>2014-07-04 17:43:26 -0400
committerDaniel Lezcano <daniel.lezcano@linaro.org>2014-07-23 06:02:41 -0400
commit3252a646aa2cf706b2a26433a8bd9cb2e5dce410 (patch)
tree8e8520e5e646b3ac58be30f23885a49748431bba /drivers/clocksource/exynos_mct.c
parentfdb06f66d53e3c9ba7eeab3c0629c450aee76937 (diff)
clocksource: exynos_mct: Only use 32-bits where possible
The MCT has a nice 64-bit counter. That means that we _can_ register as a 64-bit clocksource and sched_clock. ...but that doesn't mean we should. The 64-bit counter is read by reading two 32-bit registers. That means reading needs to be something like: - Read upper half - Read lower half - Read upper half and confirm that it hasn't changed. That wouldn't be terrible, but: - THe MCT isn't very fast to access (hundreds of nanoseconds). - The clocksource is queried _all the time_. In total system profiles of real workloads on ChromeOS, we've seen exynos_frc_read() taking 2% or more of CPU time even after optimizing the 3 reads above to 2 (see below). The MCT is clocked at ~24MHz on all known systems. That means that the 32-bit half of the counter rolls over every ~178 seconds. This inspired an optimization in ChromeOS to cache the upper half between calls, moving 3 reads to 2. ...but we can do better! Having a 32-bit timer that flips every 178 seconds is more than sufficient for Linux. Let's just use the lower half of the MCT. Times on 5420 to do 1000000 gettimeofday() calls from userspace: * Original code: 1323852 us * ChromeOS cache upper half: 1173084 us * ChromeOS + ldmia to optimize: 1045674 us * Use lower 32-bit only (this code): 1014429 us As you can see, the time used doesn't increase linearly with the number of reads and we can make 64-bit work almost as fast as 32-bit with a bit of assembly code. But since there's no real gain for 64-bit, let's go with the simplest and fastest implementation. Note: with this change roughly half the time for gettimeofday() is spent in exynos_frc_read(). The rest is timer / system call overhead. Also note: this patch disables the use of the MCT on ARM64 systems until we've sorted out how to make "cycles_t" always 32-bit. Really ARM64 systems should be using arch timers anyway. Signed-off-by: Doug Anderson <dianders@chromium.org> Acked-by Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Kukjin Kim <kgene.kim@samsung.com> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Diffstat (limited to 'drivers/clocksource/exynos_mct.c')
-rw-r--r--drivers/clocksource/exynos_mct.c39
1 files changed, 32 insertions, 7 deletions
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index 2df03e238c1b..9403061a2acc 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -162,7 +162,17 @@ static void exynos4_mct_frc_start(void)
162 exynos4_mct_write(reg, EXYNOS4_MCT_G_TCON); 162 exynos4_mct_write(reg, EXYNOS4_MCT_G_TCON);
163} 163}
164 164
165static cycle_t notrace _exynos4_frc_read(void) 165/**
166 * exynos4_read_count_64 - Read all 64-bits of the global counter
167 *
168 * This will read all 64-bits of the global counter taking care to make sure
169 * that the upper and lower half match. Note that reading the MCT can be quite
170 * slow (hundreds of nanoseconds) so you should use the 32-bit (lower half
171 * only) version when possible.
172 *
173 * Returns the number of cycles in the global counter.
174 */
175static u64 exynos4_read_count_64(void)
166{ 176{
167 unsigned int lo, hi; 177 unsigned int lo, hi;
168 u32 hi2 = readl_relaxed(reg_base + EXYNOS4_MCT_G_CNT_U); 178 u32 hi2 = readl_relaxed(reg_base + EXYNOS4_MCT_G_CNT_U);
@@ -176,9 +186,22 @@ static cycle_t notrace _exynos4_frc_read(void)
176 return ((cycle_t)hi << 32) | lo; 186 return ((cycle_t)hi << 32) | lo;
177} 187}
178 188
189/**
190 * exynos4_read_count_32 - Read the lower 32-bits of the global counter
191 *
192 * This will read just the lower 32-bits of the global counter. This is marked
193 * as notrace so it can be used by the scheduler clock.
194 *
195 * Returns the number of cycles in the global counter (lower 32 bits).
196 */
197static u32 notrace exynos4_read_count_32(void)
198{
199 return readl_relaxed(reg_base + EXYNOS4_MCT_G_CNT_L);
200}
201
179static cycle_t exynos4_frc_read(struct clocksource *cs) 202static cycle_t exynos4_frc_read(struct clocksource *cs)
180{ 203{
181 return _exynos4_frc_read(); 204 return exynos4_read_count_32();
182} 205}
183 206
184static void exynos4_frc_resume(struct clocksource *cs) 207static void exynos4_frc_resume(struct clocksource *cs)
@@ -190,21 +213,23 @@ struct clocksource mct_frc = {
190 .name = "mct-frc", 213 .name = "mct-frc",
191 .rating = 400, 214 .rating = 400,
192 .read = exynos4_frc_read, 215 .read = exynos4_frc_read,
193 .mask = CLOCKSOURCE_MASK(64), 216 .mask = CLOCKSOURCE_MASK(32),
194 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 217 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
195 .resume = exynos4_frc_resume, 218 .resume = exynos4_frc_resume,
196}; 219};
197 220
198static u64 notrace exynos4_read_sched_clock(void) 221static u64 notrace exynos4_read_sched_clock(void)
199{ 222{
200 return _exynos4_frc_read(); 223 return exynos4_read_count_32();
201} 224}
202 225
203static struct delay_timer exynos4_delay_timer; 226static struct delay_timer exynos4_delay_timer;
204 227
205static cycles_t exynos4_read_current_timer(void) 228static cycles_t exynos4_read_current_timer(void)
206{ 229{
207 return _exynos4_frc_read(); 230 BUILD_BUG_ON_MSG(sizeof(cycles_t) != sizeof(u32),
231 "cycles_t needs to move to 32-bit for ARM64 usage");
232 return exynos4_read_count_32();
208} 233}
209 234
210static void __init exynos4_clocksource_init(void) 235static void __init exynos4_clocksource_init(void)
@@ -218,7 +243,7 @@ static void __init exynos4_clocksource_init(void)
218 if (clocksource_register_hz(&mct_frc, clk_rate)) 243 if (clocksource_register_hz(&mct_frc, clk_rate))
219 panic("%s: can't register clocksource\n", mct_frc.name); 244 panic("%s: can't register clocksource\n", mct_frc.name);
220 245
221 sched_clock_register(exynos4_read_sched_clock, 64, clk_rate); 246 sched_clock_register(exynos4_read_sched_clock, 32, clk_rate);
222} 247}
223 248
224static void exynos4_mct_comp0_stop(void) 249static void exynos4_mct_comp0_stop(void)
@@ -245,7 +270,7 @@ static void exynos4_mct_comp0_start(enum clock_event_mode mode,
245 exynos4_mct_write(cycles, EXYNOS4_MCT_G_COMP0_ADD_INCR); 270 exynos4_mct_write(cycles, EXYNOS4_MCT_G_COMP0_ADD_INCR);
246 } 271 }
247 272
248 comp_cycle = exynos4_frc_read(&mct_frc) + cycles; 273 comp_cycle = exynos4_read_count_64() + cycles;
249 exynos4_mct_write((u32)comp_cycle, EXYNOS4_MCT_G_COMP0_L); 274 exynos4_mct_write((u32)comp_cycle, EXYNOS4_MCT_G_COMP0_L);
250 exynos4_mct_write((u32)(comp_cycle >> 32), EXYNOS4_MCT_G_COMP0_U); 275 exynos4_mct_write((u32)(comp_cycle >> 32), EXYNOS4_MCT_G_COMP0_U);
251 276