diff options
| author | John Stultz <john.stultz@linaro.org> | 2013-09-16 21:52:52 -0400 |
|---|---|---|
| committer | John Stultz <john.stultz@linaro.org> | 2013-09-16 21:52:52 -0400 |
| commit | 19c3205ceaffc8f860acf61537fa211087e4b8fc (patch) | |
| tree | 36a232e278c4d7f8d893ba211aec7c8d07743e09 /kernel | |
| parent | 272b98c6455f00884f0350f775c5342358ebb73f (diff) | |
| parent | e7e3ff1bfe9c42ee31172e9afdc0383a9e595e29 (diff) | |
Merge branch 'fortglx/3.12/sched-clock64-base' into fortglx/3.13/time
Merge in 64bit sched_clock support that missed 3.12.
Conflicts:
kernel/time/sched_clock.c
Signed-off-by: John.Stultz <john.stultz@linaro.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/time/clocksource.c | 45 | ||||
| -rw-r--r-- | kernel/time/sched_clock.c | 111 |
2 files changed, 91 insertions, 65 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 50a8736757f3..637a14af6c21 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -537,40 +537,55 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) | |||
| 537 | } | 537 | } |
| 538 | 538 | ||
| 539 | /** | 539 | /** |
| 540 | * clocksource_max_deferment - Returns max time the clocksource can be deferred | 540 | * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted |
| 541 | * @cs: Pointer to clocksource | 541 | * @mult: cycle to nanosecond multiplier |
| 542 | * | 542 | * @shift: cycle to nanosecond divisor (power of two) |
| 543 | * @maxadj: maximum adjustment value to mult (~11%) | ||
| 544 | * @mask: bitmask for two's complement subtraction of non 64 bit counters | ||
| 543 | */ | 545 | */ |
| 544 | static u64 clocksource_max_deferment(struct clocksource *cs) | 546 | u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) |
| 545 | { | 547 | { |
| 546 | u64 max_nsecs, max_cycles; | 548 | u64 max_nsecs, max_cycles; |
| 547 | 549 | ||
| 548 | /* | 550 | /* |
| 549 | * Calculate the maximum number of cycles that we can pass to the | 551 | * Calculate the maximum number of cycles that we can pass to the |
| 550 | * cyc2ns function without overflowing a 64-bit signed result. The | 552 | * cyc2ns function without overflowing a 64-bit signed result. The |
| 551 | * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj) | 553 | * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) |
| 552 | * which is equivalent to the below. | 554 | * which is equivalent to the below. |
| 553 | * max_cycles < (2^63)/(cs->mult + cs->maxadj) | 555 | * max_cycles < (2^63)/(mult + maxadj) |
| 554 | * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj))) | 556 | * max_cycles < 2^(log2((2^63)/(mult + maxadj))) |
| 555 | * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj)) | 557 | * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) |
| 556 | * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj)) | 558 | * max_cycles < 2^(63 - log2(mult + maxadj)) |
| 557 | * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj)) | 559 | * max_cycles < 1 << (63 - log2(mult + maxadj)) |
| 558 | * Please note that we add 1 to the result of the log2 to account for | 560 | * Please note that we add 1 to the result of the log2 to account for |
| 559 | * any rounding errors, ensure the above inequality is satisfied and | 561 | * any rounding errors, ensure the above inequality is satisfied and |
| 560 | * no overflow will occur. | 562 | * no overflow will occur. |
| 561 | */ | 563 | */ |
| 562 | max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1)); | 564 | max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); |
| 563 | 565 | ||
| 564 | /* | 566 | /* |
| 565 | * The actual maximum number of cycles we can defer the clocksource is | 567 | * The actual maximum number of cycles we can defer the clocksource is |
| 566 | * determined by the minimum of max_cycles and cs->mask. | 568 | * determined by the minimum of max_cycles and mask. |
| 567 | * Note: Here we subtract the maxadj to make sure we don't sleep for | 569 | * Note: Here we subtract the maxadj to make sure we don't sleep for |
| 568 | * too long if there's a large negative adjustment. | 570 | * too long if there's a large negative adjustment. |
| 569 | */ | 571 | */ |
| 570 | max_cycles = min_t(u64, max_cycles, (u64) cs->mask); | 572 | max_cycles = min(max_cycles, mask); |
| 571 | max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj, | 573 | max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); |
| 572 | cs->shift); | 574 | |
| 575 | return max_nsecs; | ||
| 576 | } | ||
| 577 | |||
| 578 | /** | ||
| 579 | * clocksource_max_deferment - Returns max time the clocksource can be deferred | ||
| 580 | * @cs: Pointer to clocksource | ||
| 581 | * | ||
| 582 | */ | ||
| 583 | static u64 clocksource_max_deferment(struct clocksource *cs) | ||
| 584 | { | ||
| 585 | u64 max_nsecs; | ||
| 573 | 586 | ||
| 587 | max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, | ||
| 588 | cs->mask); | ||
| 574 | /* | 589 | /* |
| 575 | * To ensure that the clocksource does not wrap whilst we are idle, | 590 | * To ensure that the clocksource does not wrap whilst we are idle, |
| 576 | * limit the time the clocksource can be deferred by 12.5%. Please | 591 | * limit the time the clocksource can be deferred by 12.5%. Please |
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 0b479a6a22bb..f388baeaf2b6 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c | |||
| @@ -8,25 +8,28 @@ | |||
| 8 | #include <linux/clocksource.h> | 8 | #include <linux/clocksource.h> |
| 9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
| 10 | #include <linux/jiffies.h> | 10 | #include <linux/jiffies.h> |
| 11 | #include <linux/ktime.h> | ||
| 11 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
| 12 | #include <linux/moduleparam.h> | 13 | #include <linux/moduleparam.h> |
| 13 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
| 14 | #include <linux/syscore_ops.h> | 15 | #include <linux/syscore_ops.h> |
| 15 | #include <linux/timer.h> | 16 | #include <linux/hrtimer.h> |
| 16 | #include <linux/sched_clock.h> | 17 | #include <linux/sched_clock.h> |
| 18 | #include <linux/seqlock.h> | ||
| 19 | #include <linux/bitops.h> | ||
| 17 | 20 | ||
| 18 | struct clock_data { | 21 | struct clock_data { |
| 22 | ktime_t wrap_kt; | ||
| 19 | u64 epoch_ns; | 23 | u64 epoch_ns; |
| 20 | u32 epoch_cyc; | 24 | u64 epoch_cyc; |
| 21 | u32 epoch_cyc_copy; | 25 | seqcount_t seq; |
| 22 | unsigned long rate; | 26 | unsigned long rate; |
| 23 | u32 mult; | 27 | u32 mult; |
| 24 | u32 shift; | 28 | u32 shift; |
| 25 | bool suspended; | 29 | bool suspended; |
| 26 | }; | 30 | }; |
| 27 | 31 | ||
| 28 | static void sched_clock_poll(unsigned long wrap_ticks); | 32 | static struct hrtimer sched_clock_timer; |
| 29 | static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); | ||
| 30 | static int irqtime = -1; | 33 | static int irqtime = -1; |
| 31 | 34 | ||
| 32 | core_param(irqtime, irqtime, int, 0400); | 35 | core_param(irqtime, irqtime, int, 0400); |
| @@ -35,14 +38,25 @@ static struct clock_data cd = { | |||
| 35 | .mult = NSEC_PER_SEC / HZ, | 38 | .mult = NSEC_PER_SEC / HZ, |
| 36 | }; | 39 | }; |
| 37 | 40 | ||
| 38 | static u32 __read_mostly sched_clock_mask = 0xffffffff; | 41 | static u64 __read_mostly sched_clock_mask; |
| 39 | 42 | ||
| 40 | static u32 notrace jiffy_sched_clock_read(void) | 43 | static u64 notrace jiffy_sched_clock_read(void) |
| 41 | { | 44 | { |
| 42 | return (u32)(jiffies - INITIAL_JIFFIES); | 45 | /* |
| 46 | * We don't need to use get_jiffies_64 on 32-bit arches here | ||
| 47 | * because we register with BITS_PER_LONG | ||
| 48 | */ | ||
| 49 | return (u64)(jiffies - INITIAL_JIFFIES); | ||
| 43 | } | 50 | } |
| 44 | 51 | ||
| 45 | static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; | 52 | static u32 __read_mostly (*read_sched_clock_32)(void); |
| 53 | |||
| 54 | static u64 notrace read_sched_clock_32_wrapper(void) | ||
| 55 | { | ||
| 56 | return read_sched_clock_32(); | ||
| 57 | } | ||
| 58 | |||
| 59 | static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; | ||
| 46 | 60 | ||
| 47 | static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) | 61 | static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) |
| 48 | { | 62 | { |
| @@ -52,25 +66,18 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) | |||
| 52 | static unsigned long long notrace sched_clock_32(void) | 66 | static unsigned long long notrace sched_clock_32(void) |
| 53 | { | 67 | { |
| 54 | u64 epoch_ns; | 68 | u64 epoch_ns; |
| 55 | u32 epoch_cyc; | 69 | u64 epoch_cyc; |
| 56 | u32 cyc; | 70 | u64 cyc; |
| 71 | unsigned long seq; | ||
| 57 | 72 | ||
| 58 | if (cd.suspended) | 73 | if (cd.suspended) |
| 59 | return cd.epoch_ns; | 74 | return cd.epoch_ns; |
| 60 | 75 | ||
| 61 | /* | ||
| 62 | * Load the epoch_cyc and epoch_ns atomically. We do this by | ||
| 63 | * ensuring that we always write epoch_cyc, epoch_ns and | ||
| 64 | * epoch_cyc_copy in strict order, and read them in strict order. | ||
| 65 | * If epoch_cyc and epoch_cyc_copy are not equal, then we're in | ||
| 66 | * the middle of an update, and we should repeat the load. | ||
| 67 | */ | ||
| 68 | do { | 76 | do { |
| 77 | seq = read_seqcount_begin(&cd.seq); | ||
| 69 | epoch_cyc = cd.epoch_cyc; | 78 | epoch_cyc = cd.epoch_cyc; |
| 70 | smp_rmb(); | ||
| 71 | epoch_ns = cd.epoch_ns; | 79 | epoch_ns = cd.epoch_ns; |
| 72 | smp_rmb(); | 80 | } while (read_seqcount_retry(&cd.seq, seq)); |
| 73 | } while (epoch_cyc != cd.epoch_cyc_copy); | ||
| 74 | 81 | ||
| 75 | cyc = read_sched_clock(); | 82 | cyc = read_sched_clock(); |
| 76 | cyc = (cyc - epoch_cyc) & sched_clock_mask; | 83 | cyc = (cyc - epoch_cyc) & sched_clock_mask; |
| @@ -83,49 +90,46 @@ static unsigned long long notrace sched_clock_32(void) | |||
| 83 | static void notrace update_sched_clock(void) | 90 | static void notrace update_sched_clock(void) |
| 84 | { | 91 | { |
| 85 | unsigned long flags; | 92 | unsigned long flags; |
| 86 | u32 cyc; | 93 | u64 cyc; |
| 87 | u64 ns; | 94 | u64 ns; |
| 88 | 95 | ||
| 89 | cyc = read_sched_clock(); | 96 | cyc = read_sched_clock(); |
| 90 | ns = cd.epoch_ns + | 97 | ns = cd.epoch_ns + |
| 91 | cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, | 98 | cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, |
| 92 | cd.mult, cd.shift); | 99 | cd.mult, cd.shift); |
| 93 | /* | 100 | |
| 94 | * Write epoch_cyc and epoch_ns in a way that the update is | ||
| 95 | * detectable in cyc_to_fixed_sched_clock(). | ||
| 96 | */ | ||
| 97 | raw_local_irq_save(flags); | 101 | raw_local_irq_save(flags); |
| 98 | cd.epoch_cyc_copy = cyc; | 102 | write_seqcount_begin(&cd.seq); |
| 99 | smp_wmb(); | ||
| 100 | cd.epoch_ns = ns; | 103 | cd.epoch_ns = ns; |
| 101 | smp_wmb(); | ||
| 102 | cd.epoch_cyc = cyc; | 104 | cd.epoch_cyc = cyc; |
| 105 | write_seqcount_end(&cd.seq); | ||
| 103 | raw_local_irq_restore(flags); | 106 | raw_local_irq_restore(flags); |
| 104 | } | 107 | } |
| 105 | 108 | ||
| 106 | static void sched_clock_poll(unsigned long wrap_ticks) | 109 | static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) |
| 107 | { | 110 | { |
| 108 | mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); | ||
| 109 | update_sched_clock(); | 111 | update_sched_clock(); |
| 112 | hrtimer_forward_now(hrt, cd.wrap_kt); | ||
| 113 | return HRTIMER_RESTART; | ||
| 110 | } | 114 | } |
| 111 | 115 | ||
| 112 | void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) | 116 | void __init sched_clock_register(u64 (*read)(void), int bits, |
| 117 | unsigned long rate) | ||
| 113 | { | 118 | { |
| 114 | unsigned long r, w; | 119 | unsigned long r; |
| 115 | u64 res, wrap; | 120 | u64 res, wrap; |
| 116 | char r_unit; | 121 | char r_unit; |
| 117 | 122 | ||
| 118 | if (cd.rate > rate) | 123 | if (cd.rate > rate) |
| 119 | return; | 124 | return; |
| 120 | 125 | ||
| 121 | BUG_ON(bits > 32); | ||
| 122 | WARN_ON(!irqs_disabled()); | 126 | WARN_ON(!irqs_disabled()); |
| 123 | read_sched_clock = read; | 127 | read_sched_clock = read; |
| 124 | sched_clock_mask = (1ULL << bits) - 1; | 128 | sched_clock_mask = CLOCKSOURCE_MASK(bits); |
| 125 | cd.rate = rate; | 129 | cd.rate = rate; |
| 126 | 130 | ||
| 127 | /* calculate the mult/shift to convert counter ticks to ns. */ | 131 | /* calculate the mult/shift to convert counter ticks to ns. */ |
| 128 | clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); | 132 | clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 3600); |
| 129 | 133 | ||
| 130 | r = rate; | 134 | r = rate; |
| 131 | if (r >= 4000000) { | 135 | if (r >= 4000000) { |
| @@ -138,20 +142,14 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) | |||
| 138 | r_unit = ' '; | 142 | r_unit = ' '; |
| 139 | 143 | ||
| 140 | /* calculate how many ns until we wrap */ | 144 | /* calculate how many ns until we wrap */ |
| 141 | wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); | 145 | wrap = clocks_calc_max_nsecs(cd.mult, cd.shift, 0, sched_clock_mask); |
| 142 | do_div(wrap, NSEC_PER_MSEC); | 146 | cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); |
| 143 | w = wrap; | ||
| 144 | 147 | ||
| 145 | /* calculate the ns resolution of this counter */ | 148 | /* calculate the ns resolution of this counter */ |
| 146 | res = cyc_to_ns(1ULL, cd.mult, cd.shift); | 149 | res = cyc_to_ns(1ULL, cd.mult, cd.shift); |
| 147 | pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", | 150 | pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", |
| 148 | bits, r, r_unit, res, w); | 151 | bits, r, r_unit, res, wrap); |
| 149 | 152 | ||
| 150 | /* | ||
| 151 | * Start the timer to keep sched_clock() properly updated and | ||
| 152 | * sets the initial epoch. | ||
| 153 | */ | ||
| 154 | sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); | ||
| 155 | update_sched_clock(); | 153 | update_sched_clock(); |
| 156 | 154 | ||
| 157 | /* | 155 | /* |
| @@ -166,6 +164,12 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) | |||
| 166 | pr_debug("Registered %pF as sched_clock source\n", read); | 164 | pr_debug("Registered %pF as sched_clock source\n", read); |
| 167 | } | 165 | } |
| 168 | 166 | ||
| 167 | void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) | ||
| 168 | { | ||
| 169 | read_sched_clock_32 = read; | ||
| 170 | sched_clock_register(read_sched_clock_32_wrapper, bits, rate); | ||
| 171 | } | ||
| 172 | |||
| 169 | unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; | 173 | unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; |
| 170 | 174 | ||
| 171 | unsigned long long notrace sched_clock(void) | 175 | unsigned long long notrace sched_clock(void) |
| @@ -180,14 +184,22 @@ void __init sched_clock_postinit(void) | |||
| 180 | * make it the final one one. | 184 | * make it the final one one. |
| 181 | */ | 185 | */ |
| 182 | if (read_sched_clock == jiffy_sched_clock_read) | 186 | if (read_sched_clock == jiffy_sched_clock_read) |
| 183 | setup_sched_clock(jiffy_sched_clock_read, 32, HZ); | 187 | sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); |
| 184 | 188 | ||
| 185 | sched_clock_poll(sched_clock_timer.data); | 189 | update_sched_clock(); |
| 190 | |||
| 191 | /* | ||
| 192 | * Start the timer to keep sched_clock() properly updated and | ||
| 193 | * sets the initial epoch. | ||
| 194 | */ | ||
| 195 | hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
| 196 | sched_clock_timer.function = sched_clock_poll; | ||
| 197 | hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); | ||
| 186 | } | 198 | } |
| 187 | 199 | ||
| 188 | static int sched_clock_suspend(void) | 200 | static int sched_clock_suspend(void) |
| 189 | { | 201 | { |
| 190 | sched_clock_poll(sched_clock_timer.data); | 202 | sched_clock_poll(&sched_clock_timer); |
| 191 | cd.suspended = true; | 203 | cd.suspended = true; |
| 192 | return 0; | 204 | return 0; |
| 193 | } | 205 | } |
| @@ -195,7 +207,6 @@ static int sched_clock_suspend(void) | |||
| 195 | static void sched_clock_resume(void) | 207 | static void sched_clock_resume(void) |
| 196 | { | 208 | { |
| 197 | cd.epoch_cyc = read_sched_clock(); | 209 | cd.epoch_cyc = read_sched_clock(); |
| 198 | cd.epoch_cyc_copy = cd.epoch_cyc; | ||
| 199 | cd.suspended = false; | 210 | cd.suspended = false; |
| 200 | } | 211 | } |
| 201 | 212 | ||
