aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Thompson <daniel.thompson@linaro.org>2015-03-26 15:23:23 -0400
committerIngo Molnar <mingo@kernel.org>2015-03-27 03:33:57 -0400
commitcf7c9c170787d6870af54684822f58acc00a966c (patch)
tree7fc5b9fe286c4f7fb5a8b54031b4dbffd9f228ac
parent8710e914027e4f64058ebbf0501cc6db3cc8454f (diff)
timers, sched/clock: Optimize cache line usage
Currently sched_clock(), a very hot code path, is not optimized to minimise its cache profile. In particular: 1. cd is not ____cacheline_aligned, 2. struct clock_data does not distinguish between hotpath and coldpath data, reducing locality of reference in the hotpath, 3. Some hotpath data is missing from struct clock_data and is marked __read_mostly (which more or less guarantees it will not share a cache line with cd). This patch corrects these problems by extracting all hotpath data into a separate structure and using ____cacheline_aligned to ensure the hotpath uses a single (64 byte) cache line. Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org> Signed-off-by: John Stultz <john.stultz@linaro.org> Reviewed-by: Stephen Boyd <sboyd@codeaurora.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will.deacon@arm.com> Link: http://lkml.kernel.org/r/1427397806-20889-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/time/sched_clock.c112
1 files changed, 77 insertions, 35 deletions
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 1751e956add9..872e0685d1fb 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -18,28 +18,59 @@
18#include <linux/seqlock.h> 18#include <linux/seqlock.h>
19#include <linux/bitops.h> 19#include <linux/bitops.h>
20 20
21struct clock_data { 21/**
22 ktime_t wrap_kt; 22 * struct clock_read_data - data required to read from sched_clock
23 *
24 * @epoch_ns: sched_clock value at last update
25 * @epoch_cyc: Clock cycle value at last update
26 * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit
27 * clocks
28 * @read_sched_clock: Current clock source (or dummy source when suspended)
29 * @mult: Multipler for scaled math conversion
30 * @shift: Shift value for scaled math conversion
31 * @suspended: Flag to indicate if the clock is suspended (stopped)
32 *
33 * Care must be taken when updating this structure; it is read by
34 * some very hot code paths. It occupies <=48 bytes and, when combined
35 * with the seqcount used to synchronize access, comfortably fits into
36 * a 64 byte cache line.
37 */
38struct clock_read_data {
23 u64 epoch_ns; 39 u64 epoch_ns;
24 u64 epoch_cyc; 40 u64 epoch_cyc;
25 seqcount_t seq; 41 u64 sched_clock_mask;
26 unsigned long rate; 42 u64 (*read_sched_clock)(void);
27 u32 mult; 43 u32 mult;
28 u32 shift; 44 u32 shift;
29 bool suspended; 45 bool suspended;
30}; 46};
31 47
48/**
49 * struct clock_data - all data needed for sched_clock (including
50 * registration of a new clock source)
51 *
52 * @seq: Sequence counter for protecting updates.
53 * @read_data: Data required to read from sched_clock.
54 * @wrap_kt: Duration for which clock can run before wrapping
55 * @rate: Tick rate of the registered clock
56 * @actual_read_sched_clock: Registered clock read function
57 *
58 * The ordering of this structure has been chosen to optimize cache
59 * performance. In particular seq and read_data (combined) should fit
60 * into a single 64 byte cache line.
61 */
62struct clock_data {
63 seqcount_t seq;
64 struct clock_read_data read_data;
65 ktime_t wrap_kt;
66 unsigned long rate;
67};
68
32static struct hrtimer sched_clock_timer; 69static struct hrtimer sched_clock_timer;
33static int irqtime = -1; 70static int irqtime = -1;
34 71
35core_param(irqtime, irqtime, int, 0400); 72core_param(irqtime, irqtime, int, 0400);
36 73
37static struct clock_data cd = {
38 .mult = NSEC_PER_SEC / HZ,
39};
40
41static u64 __read_mostly sched_clock_mask;
42
43static u64 notrace jiffy_sched_clock_read(void) 74static u64 notrace jiffy_sched_clock_read(void)
44{ 75{
45 /* 76 /*
@@ -49,7 +80,10 @@ static u64 notrace jiffy_sched_clock_read(void)
49 return (u64)(jiffies - INITIAL_JIFFIES); 80 return (u64)(jiffies - INITIAL_JIFFIES);
50} 81}
51 82
52static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; 83static struct clock_data cd ____cacheline_aligned = {
84 .read_data = { .mult = NSEC_PER_SEC / HZ,
85 .read_sched_clock = jiffy_sched_clock_read, },
86};
53 87
54static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) 88static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
55{ 89{
@@ -60,15 +94,16 @@ unsigned long long notrace sched_clock(void)
60{ 94{
61 u64 cyc, res; 95 u64 cyc, res;
62 unsigned long seq; 96 unsigned long seq;
97 struct clock_read_data *rd = &cd.read_data;
63 98
64 do { 99 do {
65 seq = raw_read_seqcount_begin(&cd.seq); 100 seq = raw_read_seqcount_begin(&cd.seq);
66 101
67 res = cd.epoch_ns; 102 res = rd->epoch_ns;
68 if (!cd.suspended) { 103 if (!rd->suspended) {
69 cyc = read_sched_clock(); 104 cyc = rd->read_sched_clock();
70 cyc = (cyc - cd.epoch_cyc) & sched_clock_mask; 105 cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask;
71 res += cyc_to_ns(cyc, cd.mult, cd.shift); 106 res += cyc_to_ns(cyc, rd->mult, rd->shift);
72 } 107 }
73 } while (read_seqcount_retry(&cd.seq, seq)); 108 } while (read_seqcount_retry(&cd.seq, seq));
74 109
@@ -83,16 +118,17 @@ static void notrace update_sched_clock(void)
83 unsigned long flags; 118 unsigned long flags;
84 u64 cyc; 119 u64 cyc;
85 u64 ns; 120 u64 ns;
121 struct clock_read_data *rd = &cd.read_data;
86 122
87 cyc = read_sched_clock(); 123 cyc = rd->read_sched_clock();
88 ns = cd.epoch_ns + 124 ns = rd->epoch_ns +
89 cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, 125 cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
90 cd.mult, cd.shift); 126 rd->mult, rd->shift);
91 127
92 raw_local_irq_save(flags); 128 raw_local_irq_save(flags);
93 raw_write_seqcount_begin(&cd.seq); 129 raw_write_seqcount_begin(&cd.seq);
94 cd.epoch_ns = ns; 130 rd->epoch_ns = ns;
95 cd.epoch_cyc = cyc; 131 rd->epoch_cyc = cyc;
96 raw_write_seqcount_end(&cd.seq); 132 raw_write_seqcount_end(&cd.seq);
97 raw_local_irq_restore(flags); 133 raw_local_irq_restore(flags);
98} 134}
@@ -111,6 +147,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
111 u32 new_mult, new_shift; 147 u32 new_mult, new_shift;
112 unsigned long r; 148 unsigned long r;
113 char r_unit; 149 char r_unit;
150 struct clock_read_data *rd = &cd.read_data;
114 151
115 if (cd.rate > rate) 152 if (cd.rate > rate)
116 return; 153 return;
@@ -129,17 +166,18 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
129 166
130 /* update epoch for new counter and update epoch_ns from old counter*/ 167 /* update epoch for new counter and update epoch_ns from old counter*/
131 new_epoch = read(); 168 new_epoch = read();
132 cyc = read_sched_clock(); 169 cyc = rd->read_sched_clock();
133 ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, 170 ns = rd->epoch_ns +
134 cd.mult, cd.shift); 171 cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
172 rd->mult, rd->shift);
135 173
136 raw_write_seqcount_begin(&cd.seq); 174 raw_write_seqcount_begin(&cd.seq);
137 read_sched_clock = read; 175 rd->read_sched_clock = read;
138 sched_clock_mask = new_mask; 176 rd->sched_clock_mask = new_mask;
139 cd.mult = new_mult; 177 rd->mult = new_mult;
140 cd.shift = new_shift; 178 rd->shift = new_shift;
141 cd.epoch_cyc = new_epoch; 179 rd->epoch_cyc = new_epoch;
142 cd.epoch_ns = ns; 180 rd->epoch_ns = ns;
143 raw_write_seqcount_end(&cd.seq); 181 raw_write_seqcount_end(&cd.seq);
144 182
145 r = rate; 183 r = rate;
@@ -171,7 +209,7 @@ void __init sched_clock_postinit(void)
171 * If no sched_clock function has been provided at that point, 209 * If no sched_clock function has been provided at that point,
172 * make it the final one one. 210 * make it the final one one.
173 */ 211 */
174 if (read_sched_clock == jiffy_sched_clock_read) 212 if (cd.read_data.read_sched_clock == jiffy_sched_clock_read)
175 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); 213 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
176 214
177 update_sched_clock(); 215 update_sched_clock();
@@ -187,17 +225,21 @@ void __init sched_clock_postinit(void)
187 225
188static int sched_clock_suspend(void) 226static int sched_clock_suspend(void)
189{ 227{
228 struct clock_read_data *rd = &cd.read_data;
229
190 update_sched_clock(); 230 update_sched_clock();
191 hrtimer_cancel(&sched_clock_timer); 231 hrtimer_cancel(&sched_clock_timer);
192 cd.suspended = true; 232 rd->suspended = true;
193 return 0; 233 return 0;
194} 234}
195 235
196static void sched_clock_resume(void) 236static void sched_clock_resume(void)
197{ 237{
198 cd.epoch_cyc = read_sched_clock(); 238 struct clock_read_data *rd = &cd.read_data;
239
240 rd->epoch_cyc = rd->read_sched_clock();
199 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); 241 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
200 cd.suspended = false; 242 rd->suspended = false;
201} 243}
202 244
203static struct syscore_ops sched_clock_ops = { 245static struct syscore_ops sched_clock_ops = {