diff options
author | Daniel Thompson <daniel.thompson@linaro.org> | 2015-03-26 15:23:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-03-27 03:33:57 -0400 |
commit | cf7c9c170787d6870af54684822f58acc00a966c (patch) | |
tree | 7fc5b9fe286c4f7fb5a8b54031b4dbffd9f228ac | |
parent | 8710e914027e4f64058ebbf0501cc6db3cc8454f (diff) |
timers, sched/clock: Optimize cache line usage
Currently sched_clock(), a very hot code path, is not optimized
to minimise its cache profile. In particular:
1. cd is not ____cacheline_aligned,
2. struct clock_data does not distinguish between hotpath and
coldpath data, reducing locality of reference in the hotpath,
3. Some hotpath data is missing from struct clock_data and is marked
__read_mostly (which more or less guarantees it will not share a
cache line with cd).
This patch corrects these problems by extracting all hotpath
data into a separate structure and using ____cacheline_aligned
to ensure the hotpath uses a single (64 byte) cache line.
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1427397806-20889-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/time/sched_clock.c | 112 |
1 files changed, 77 insertions, 35 deletions
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 1751e956add9..872e0685d1fb 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c | |||
@@ -18,28 +18,59 @@ | |||
18 | #include <linux/seqlock.h> | 18 | #include <linux/seqlock.h> |
19 | #include <linux/bitops.h> | 19 | #include <linux/bitops.h> |
20 | 20 | ||
21 | struct clock_data { | 21 | /** |
22 | ktime_t wrap_kt; | 22 | * struct clock_read_data - data required to read from sched_clock |
23 | * | ||
24 | * @epoch_ns: sched_clock value at last update | ||
25 | * @epoch_cyc: Clock cycle value at last update | ||
26 | * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit | ||
27 | * clocks | ||
28 | * @read_sched_clock: Current clock source (or dummy source when suspended) | ||
29 | * @mult: Multipler for scaled math conversion | ||
30 | * @shift: Shift value for scaled math conversion | ||
31 | * @suspended: Flag to indicate if the clock is suspended (stopped) | ||
32 | * | ||
33 | * Care must be taken when updating this structure; it is read by | ||
34 | * some very hot code paths. It occupies <=48 bytes and, when combined | ||
35 | * with the seqcount used to synchronize access, comfortably fits into | ||
36 | * a 64 byte cache line. | ||
37 | */ | ||
38 | struct clock_read_data { | ||
23 | u64 epoch_ns; | 39 | u64 epoch_ns; |
24 | u64 epoch_cyc; | 40 | u64 epoch_cyc; |
25 | seqcount_t seq; | 41 | u64 sched_clock_mask; |
26 | unsigned long rate; | 42 | u64 (*read_sched_clock)(void); |
27 | u32 mult; | 43 | u32 mult; |
28 | u32 shift; | 44 | u32 shift; |
29 | bool suspended; | 45 | bool suspended; |
30 | }; | 46 | }; |
31 | 47 | ||
48 | /** | ||
49 | * struct clock_data - all data needed for sched_clock (including | ||
50 | * registration of a new clock source) | ||
51 | * | ||
52 | * @seq: Sequence counter for protecting updates. | ||
53 | * @read_data: Data required to read from sched_clock. | ||
54 | * @wrap_kt: Duration for which clock can run before wrapping | ||
55 | * @rate: Tick rate of the registered clock | ||
56 | * @actual_read_sched_clock: Registered clock read function | ||
57 | * | ||
58 | * The ordering of this structure has been chosen to optimize cache | ||
59 | * performance. In particular seq and read_data (combined) should fit | ||
60 | * into a single 64 byte cache line. | ||
61 | */ | ||
62 | struct clock_data { | ||
63 | seqcount_t seq; | ||
64 | struct clock_read_data read_data; | ||
65 | ktime_t wrap_kt; | ||
66 | unsigned long rate; | ||
67 | }; | ||
68 | |||
32 | static struct hrtimer sched_clock_timer; | 69 | static struct hrtimer sched_clock_timer; |
33 | static int irqtime = -1; | 70 | static int irqtime = -1; |
34 | 71 | ||
35 | core_param(irqtime, irqtime, int, 0400); | 72 | core_param(irqtime, irqtime, int, 0400); |
36 | 73 | ||
37 | static struct clock_data cd = { | ||
38 | .mult = NSEC_PER_SEC / HZ, | ||
39 | }; | ||
40 | |||
41 | static u64 __read_mostly sched_clock_mask; | ||
42 | |||
43 | static u64 notrace jiffy_sched_clock_read(void) | 74 | static u64 notrace jiffy_sched_clock_read(void) |
44 | { | 75 | { |
45 | /* | 76 | /* |
@@ -49,7 +80,10 @@ static u64 notrace jiffy_sched_clock_read(void) | |||
49 | return (u64)(jiffies - INITIAL_JIFFIES); | 80 | return (u64)(jiffies - INITIAL_JIFFIES); |
50 | } | 81 | } |
51 | 82 | ||
52 | static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; | 83 | static struct clock_data cd ____cacheline_aligned = { |
84 | .read_data = { .mult = NSEC_PER_SEC / HZ, | ||
85 | .read_sched_clock = jiffy_sched_clock_read, }, | ||
86 | }; | ||
53 | 87 | ||
54 | static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) | 88 | static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) |
55 | { | 89 | { |
@@ -60,15 +94,16 @@ unsigned long long notrace sched_clock(void) | |||
60 | { | 94 | { |
61 | u64 cyc, res; | 95 | u64 cyc, res; |
62 | unsigned long seq; | 96 | unsigned long seq; |
97 | struct clock_read_data *rd = &cd.read_data; | ||
63 | 98 | ||
64 | do { | 99 | do { |
65 | seq = raw_read_seqcount_begin(&cd.seq); | 100 | seq = raw_read_seqcount_begin(&cd.seq); |
66 | 101 | ||
67 | res = cd.epoch_ns; | 102 | res = rd->epoch_ns; |
68 | if (!cd.suspended) { | 103 | if (!rd->suspended) { |
69 | cyc = read_sched_clock(); | 104 | cyc = rd->read_sched_clock(); |
70 | cyc = (cyc - cd.epoch_cyc) & sched_clock_mask; | 105 | cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask; |
71 | res += cyc_to_ns(cyc, cd.mult, cd.shift); | 106 | res += cyc_to_ns(cyc, rd->mult, rd->shift); |
72 | } | 107 | } |
73 | } while (read_seqcount_retry(&cd.seq, seq)); | 108 | } while (read_seqcount_retry(&cd.seq, seq)); |
74 | 109 | ||
@@ -83,16 +118,17 @@ static void notrace update_sched_clock(void) | |||
83 | unsigned long flags; | 118 | unsigned long flags; |
84 | u64 cyc; | 119 | u64 cyc; |
85 | u64 ns; | 120 | u64 ns; |
121 | struct clock_read_data *rd = &cd.read_data; | ||
86 | 122 | ||
87 | cyc = read_sched_clock(); | 123 | cyc = rd->read_sched_clock(); |
88 | ns = cd.epoch_ns + | 124 | ns = rd->epoch_ns + |
89 | cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, | 125 | cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, |
90 | cd.mult, cd.shift); | 126 | rd->mult, rd->shift); |
91 | 127 | ||
92 | raw_local_irq_save(flags); | 128 | raw_local_irq_save(flags); |
93 | raw_write_seqcount_begin(&cd.seq); | 129 | raw_write_seqcount_begin(&cd.seq); |
94 | cd.epoch_ns = ns; | 130 | rd->epoch_ns = ns; |
95 | cd.epoch_cyc = cyc; | 131 | rd->epoch_cyc = cyc; |
96 | raw_write_seqcount_end(&cd.seq); | 132 | raw_write_seqcount_end(&cd.seq); |
97 | raw_local_irq_restore(flags); | 133 | raw_local_irq_restore(flags); |
98 | } | 134 | } |
@@ -111,6 +147,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, | |||
111 | u32 new_mult, new_shift; | 147 | u32 new_mult, new_shift; |
112 | unsigned long r; | 148 | unsigned long r; |
113 | char r_unit; | 149 | char r_unit; |
150 | struct clock_read_data *rd = &cd.read_data; | ||
114 | 151 | ||
115 | if (cd.rate > rate) | 152 | if (cd.rate > rate) |
116 | return; | 153 | return; |
@@ -129,17 +166,18 @@ void __init sched_clock_register(u64 (*read)(void), int bits, | |||
129 | 166 | ||
130 | /* update epoch for new counter and update epoch_ns from old counter*/ | 167 | /* update epoch for new counter and update epoch_ns from old counter*/ |
131 | new_epoch = read(); | 168 | new_epoch = read(); |
132 | cyc = read_sched_clock(); | 169 | cyc = rd->read_sched_clock(); |
133 | ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, | 170 | ns = rd->epoch_ns + |
134 | cd.mult, cd.shift); | 171 | cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, |
172 | rd->mult, rd->shift); | ||
135 | 173 | ||
136 | raw_write_seqcount_begin(&cd.seq); | 174 | raw_write_seqcount_begin(&cd.seq); |
137 | read_sched_clock = read; | 175 | rd->read_sched_clock = read; |
138 | sched_clock_mask = new_mask; | 176 | rd->sched_clock_mask = new_mask; |
139 | cd.mult = new_mult; | 177 | rd->mult = new_mult; |
140 | cd.shift = new_shift; | 178 | rd->shift = new_shift; |
141 | cd.epoch_cyc = new_epoch; | 179 | rd->epoch_cyc = new_epoch; |
142 | cd.epoch_ns = ns; | 180 | rd->epoch_ns = ns; |
143 | raw_write_seqcount_end(&cd.seq); | 181 | raw_write_seqcount_end(&cd.seq); |
144 | 182 | ||
145 | r = rate; | 183 | r = rate; |
@@ -171,7 +209,7 @@ void __init sched_clock_postinit(void) | |||
171 | * If no sched_clock function has been provided at that point, | 209 | * If no sched_clock function has been provided at that point, |
172 | * make it the final one one. | 210 | * make it the final one one. |
173 | */ | 211 | */ |
174 | if (read_sched_clock == jiffy_sched_clock_read) | 212 | if (cd.read_data.read_sched_clock == jiffy_sched_clock_read) |
175 | sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); | 213 | sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); |
176 | 214 | ||
177 | update_sched_clock(); | 215 | update_sched_clock(); |
@@ -187,17 +225,21 @@ void __init sched_clock_postinit(void) | |||
187 | 225 | ||
188 | static int sched_clock_suspend(void) | 226 | static int sched_clock_suspend(void) |
189 | { | 227 | { |
228 | struct clock_read_data *rd = &cd.read_data; | ||
229 | |||
190 | update_sched_clock(); | 230 | update_sched_clock(); |
191 | hrtimer_cancel(&sched_clock_timer); | 231 | hrtimer_cancel(&sched_clock_timer); |
192 | cd.suspended = true; | 232 | rd->suspended = true; |
193 | return 0; | 233 | return 0; |
194 | } | 234 | } |
195 | 235 | ||
196 | static void sched_clock_resume(void) | 236 | static void sched_clock_resume(void) |
197 | { | 237 | { |
198 | cd.epoch_cyc = read_sched_clock(); | 238 | struct clock_read_data *rd = &cd.read_data; |
239 | |||
240 | rd->epoch_cyc = rd->read_sched_clock(); | ||
199 | hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); | 241 | hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); |
200 | cd.suspended = false; | 242 | rd->suspended = false; |
201 | } | 243 | } |
202 | 244 | ||
203 | static struct syscore_ops sched_clock_ops = { | 245 | static struct syscore_ops sched_clock_ops = { |