diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2010-07-28 15:49:22 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-07-28 15:49:22 -0400 |
commit | 47916be4e28c3d6fdb97dd8fb887d1d9b3145b9d (patch) | |
tree | 3b2259ee965cbe70c4ce9325d0e0def9bc061d97 /arch/powerpc/kernel | |
parent | 852db46d55e85b475a72e665ca08d3317769ceef (diff) | |
parent | d75d68cfef4936ddf38d2694ae2f7d1f7c45db05 (diff) |
Merge branch 'powerpc.cherry-picks' into timers/clocksource
Conflicts:
arch/powerpc/kernel/time.c
Reason: The powerpc next tree contains two commits which conflict with
the timekeeping changes:
8fd63a9e powerpc: Rework VDSO gettimeofday to prevent time going backwards
c1aa687d powerpc: Clean up obsolete code relating to decrementer and timebase
John Stultz identified them and provided the conflict resolution.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 142 | ||||
-rw-r--r-- | arch/powerpc/kernel/vdso32/gettimeofday.S | 184 | ||||
-rw-r--r-- | arch/powerpc/kernel/vdso64/gettimeofday.S | 88 |
5 files changed, 72 insertions, 345 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 496cc5b3984f..acbbac6aaa22 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -342,6 +342,7 @@ int main(void) | |||
342 | DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); | 342 | DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); |
343 | DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); | 343 | DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); |
344 | DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime)); | 344 | DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime)); |
345 | DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction)); | ||
345 | DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size)); | 346 | DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size)); |
346 | DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size)); | 347 | DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size)); |
347 | DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size)); | 348 | DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size)); |
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5c196d1086d9..8764daad309b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c | |||
@@ -288,8 +288,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
288 | max_cpus = NR_CPUS; | 288 | max_cpus = NR_CPUS; |
289 | else | 289 | else |
290 | max_cpus = 1; | 290 | max_cpus = 1; |
291 | |||
292 | smp_space_timers(max_cpus); | ||
293 | 291 | ||
294 | for_each_possible_cpu(cpu) | 292 | for_each_possible_cpu(cpu) |
295 | if (cpu != boot_cpuid) | 293 | if (cpu != boot_cpuid) |
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e215f76bba1c..ce53dfa7130d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -149,16 +149,6 @@ unsigned long tb_ticks_per_usec = 100; /* sane default */ | |||
149 | EXPORT_SYMBOL(tb_ticks_per_usec); | 149 | EXPORT_SYMBOL(tb_ticks_per_usec); |
150 | unsigned long tb_ticks_per_sec; | 150 | unsigned long tb_ticks_per_sec; |
151 | EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ | 151 | EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ |
152 | u64 tb_to_xs; | ||
153 | unsigned tb_to_us; | ||
154 | |||
155 | #define TICKLEN_SCALE NTP_SCALE_SHIFT | ||
156 | static u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */ | ||
157 | static u64 ticklen_to_xs; /* 0.64 fraction */ | ||
158 | |||
159 | /* If last_tick_len corresponds to about 1/HZ seconds, then | ||
160 | last_tick_len << TICKLEN_SHIFT will be about 2^63. */ | ||
161 | #define TICKLEN_SHIFT (63 - 30 - TICKLEN_SCALE + SHIFT_HZ) | ||
162 | 152 | ||
163 | DEFINE_SPINLOCK(rtc_lock); | 153 | DEFINE_SPINLOCK(rtc_lock); |
164 | EXPORT_SYMBOL_GPL(rtc_lock); | 154 | EXPORT_SYMBOL_GPL(rtc_lock); |
@@ -174,7 +164,6 @@ unsigned long ppc_proc_freq; | |||
174 | EXPORT_SYMBOL(ppc_proc_freq); | 164 | EXPORT_SYMBOL(ppc_proc_freq); |
175 | unsigned long ppc_tb_freq; | 165 | unsigned long ppc_tb_freq; |
176 | 166 | ||
177 | static u64 tb_last_jiffy __cacheline_aligned_in_smp; | ||
178 | static DEFINE_PER_CPU(u64, last_jiffy); | 167 | static DEFINE_PER_CPU(u64, last_jiffy); |
179 | 168 | ||
180 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 169 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
@@ -446,7 +435,6 @@ EXPORT_SYMBOL(profile_pc); | |||
446 | 435 | ||
447 | static int __init iSeries_tb_recal(void) | 436 | static int __init iSeries_tb_recal(void) |
448 | { | 437 | { |
449 | struct div_result divres; | ||
450 | unsigned long titan, tb; | 438 | unsigned long titan, tb; |
451 | 439 | ||
452 | /* Make sure we only run on iSeries */ | 440 | /* Make sure we only run on iSeries */ |
@@ -477,10 +465,7 @@ static int __init iSeries_tb_recal(void) | |||
477 | tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; | 465 | tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; |
478 | tb_ticks_per_sec = new_tb_ticks_per_sec; | 466 | tb_ticks_per_sec = new_tb_ticks_per_sec; |
479 | calc_cputime_factors(); | 467 | calc_cputime_factors(); |
480 | div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); | ||
481 | tb_to_xs = divres.result_low; | ||
482 | vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; | 468 | vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; |
483 | vdso_data->tb_to_xs = tb_to_xs; | ||
484 | setup_cputime_one_jiffy(); | 469 | setup_cputime_one_jiffy(); |
485 | } | 470 | } |
486 | else { | 471 | else { |
@@ -643,27 +628,9 @@ void timer_interrupt(struct pt_regs * regs) | |||
643 | trace_timer_interrupt_exit(regs); | 628 | trace_timer_interrupt_exit(regs); |
644 | } | 629 | } |
645 | 630 | ||
646 | void wakeup_decrementer(void) | ||
647 | { | ||
648 | unsigned long ticks; | ||
649 | |||
650 | /* | ||
651 | * The timebase gets saved on sleep and restored on wakeup, | ||
652 | * so all we need to do is to reset the decrementer. | ||
653 | */ | ||
654 | ticks = tb_ticks_since(__get_cpu_var(last_jiffy)); | ||
655 | if (ticks < tb_ticks_per_jiffy) | ||
656 | ticks = tb_ticks_per_jiffy - ticks; | ||
657 | else | ||
658 | ticks = 1; | ||
659 | set_dec(ticks); | ||
660 | } | ||
661 | |||
662 | #ifdef CONFIG_SUSPEND | 631 | #ifdef CONFIG_SUSPEND |
663 | void generic_suspend_disable_irqs(void) | 632 | static void generic_suspend_disable_irqs(void) |
664 | { | 633 | { |
665 | preempt_disable(); | ||
666 | |||
667 | /* Disable the decrementer, so that it doesn't interfere | 634 | /* Disable the decrementer, so that it doesn't interfere |
668 | * with suspending. | 635 | * with suspending. |
669 | */ | 636 | */ |
@@ -673,12 +640,9 @@ void generic_suspend_disable_irqs(void) | |||
673 | set_dec(0x7fffffff); | 640 | set_dec(0x7fffffff); |
674 | } | 641 | } |
675 | 642 | ||
676 | void generic_suspend_enable_irqs(void) | 643 | static void generic_suspend_enable_irqs(void) |
677 | { | 644 | { |
678 | wakeup_decrementer(); | ||
679 | |||
680 | local_irq_enable(); | 645 | local_irq_enable(); |
681 | preempt_enable(); | ||
682 | } | 646 | } |
683 | 647 | ||
684 | /* Overrides the weak version in kernel/power/main.c */ | 648 | /* Overrides the weak version in kernel/power/main.c */ |
@@ -698,23 +662,6 @@ void arch_suspend_enable_irqs(void) | |||
698 | } | 662 | } |
699 | #endif | 663 | #endif |
700 | 664 | ||
701 | #ifdef CONFIG_SMP | ||
702 | void __init smp_space_timers(unsigned int max_cpus) | ||
703 | { | ||
704 | int i; | ||
705 | u64 previous_tb = per_cpu(last_jiffy, boot_cpuid); | ||
706 | |||
707 | /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */ | ||
708 | previous_tb -= tb_ticks_per_jiffy; | ||
709 | |||
710 | for_each_possible_cpu(i) { | ||
711 | if (i == boot_cpuid) | ||
712 | continue; | ||
713 | per_cpu(last_jiffy, i) = previous_tb; | ||
714 | } | ||
715 | } | ||
716 | #endif | ||
717 | |||
718 | /* | 665 | /* |
719 | * Scheduler clock - returns current time in nanosec units. | 666 | * Scheduler clock - returns current time in nanosec units. |
720 | * | 667 | * |
@@ -853,6 +800,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
853 | struct clocksource *clock, u32 mult) | 800 | struct clocksource *clock, u32 mult) |
854 | { | 801 | { |
855 | u64 new_tb_to_xs, new_stamp_xsec; | 802 | u64 new_tb_to_xs, new_stamp_xsec; |
803 | u32 frac_sec; | ||
856 | 804 | ||
857 | if (clock != &clocksource_timebase) | 805 | if (clock != &clocksource_timebase) |
858 | return; | 806 | return; |
@@ -868,6 +816,10 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
868 | do_div(new_stamp_xsec, 1000000000); | 816 | do_div(new_stamp_xsec, 1000000000); |
869 | new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; | 817 | new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; |
870 | 818 | ||
819 | BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); | ||
820 | /* this is tv_nsec / 1e9 as a 0.32 fraction */ | ||
821 | frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; | ||
822 | |||
871 | /* | 823 | /* |
872 | * tb_update_count is used to allow the userspace gettimeofday code | 824 | * tb_update_count is used to allow the userspace gettimeofday code |
873 | * to assure itself that it sees a consistent view of the tb_to_xs and | 825 | * to assure itself that it sees a consistent view of the tb_to_xs and |
@@ -885,6 +837,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
885 | vdso_data->wtom_clock_sec = wtm->tv_sec; | 837 | vdso_data->wtom_clock_sec = wtm->tv_sec; |
886 | vdso_data->wtom_clock_nsec = wtm->tv_nsec; | 838 | vdso_data->wtom_clock_nsec = wtm->tv_nsec; |
887 | vdso_data->stamp_xtime = *wall_time; | 839 | vdso_data->stamp_xtime = *wall_time; |
840 | vdso_data->stamp_sec_fraction = frac_sec; | ||
888 | smp_wmb(); | 841 | smp_wmb(); |
889 | ++(vdso_data->tb_update_count); | 842 | ++(vdso_data->tb_update_count); |
890 | } | 843 | } |
@@ -1002,15 +955,13 @@ void secondary_cpu_time_init(void) | |||
1002 | /* This function is only called on the boot processor */ | 955 | /* This function is only called on the boot processor */ |
1003 | void __init time_init(void) | 956 | void __init time_init(void) |
1004 | { | 957 | { |
1005 | unsigned long flags; | ||
1006 | struct div_result res; | 958 | struct div_result res; |
1007 | u64 scale, x; | 959 | u64 scale; |
1008 | unsigned shift; | 960 | unsigned shift; |
1009 | 961 | ||
1010 | if (__USE_RTC()) { | 962 | if (__USE_RTC()) { |
1011 | /* 601 processor: dec counts down by 128 every 128ns */ | 963 | /* 601 processor: dec counts down by 128 every 128ns */ |
1012 | ppc_tb_freq = 1000000000; | 964 | ppc_tb_freq = 1000000000; |
1013 | tb_last_jiffy = get_rtcl(); | ||
1014 | } else { | 965 | } else { |
1015 | /* Normal PowerPC with timebase register */ | 966 | /* Normal PowerPC with timebase register */ |
1016 | ppc_md.calibrate_decr(); | 967 | ppc_md.calibrate_decr(); |
@@ -1018,50 +969,15 @@ void __init time_init(void) | |||
1018 | ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); | 969 | ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); |
1019 | printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", | 970 | printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", |
1020 | ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); | 971 | ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); |
1021 | tb_last_jiffy = get_tb(); | ||
1022 | } | 972 | } |
1023 | 973 | ||
1024 | tb_ticks_per_jiffy = ppc_tb_freq / HZ; | 974 | tb_ticks_per_jiffy = ppc_tb_freq / HZ; |
1025 | tb_ticks_per_sec = ppc_tb_freq; | 975 | tb_ticks_per_sec = ppc_tb_freq; |
1026 | tb_ticks_per_usec = ppc_tb_freq / 1000000; | 976 | tb_ticks_per_usec = ppc_tb_freq / 1000000; |
1027 | tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); | ||
1028 | calc_cputime_factors(); | 977 | calc_cputime_factors(); |
1029 | setup_cputime_one_jiffy(); | 978 | setup_cputime_one_jiffy(); |
1030 | 979 | ||
1031 | /* | 980 | /* |
1032 | * Calculate the length of each tick in ns. It will not be | ||
1033 | * exactly 1e9/HZ unless ppc_tb_freq is divisible by HZ. | ||
1034 | * We compute 1e9 * tb_ticks_per_jiffy / ppc_tb_freq, | ||
1035 | * rounded up. | ||
1036 | */ | ||
1037 | x = (u64) NSEC_PER_SEC * tb_ticks_per_jiffy + ppc_tb_freq - 1; | ||
1038 | do_div(x, ppc_tb_freq); | ||
1039 | tick_nsec = x; | ||
1040 | last_tick_len = x << TICKLEN_SCALE; | ||
1041 | |||
1042 | /* | ||
1043 | * Compute ticklen_to_xs, which is a factor which gets multiplied | ||
1044 | * by (last_tick_len << TICKLEN_SHIFT) to get a tb_to_xs value. | ||
1045 | * It is computed as: | ||
1046 | * ticklen_to_xs = 2^N / (tb_ticks_per_jiffy * 1e9) | ||
1047 | * where N = 64 + 20 - TICKLEN_SCALE - TICKLEN_SHIFT | ||
1048 | * which turns out to be N = 51 - SHIFT_HZ. | ||
1049 | * This gives the result as a 0.64 fixed-point fraction. | ||
1050 | * That value is reduced by an offset amounting to 1 xsec per | ||
1051 | * 2^31 timebase ticks to avoid problems with time going backwards | ||
1052 | * by 1 xsec when we do timer_recalc_offset due to losing the | ||
1053 | * fractional xsec. That offset is equal to ppc_tb_freq/2^51 | ||
1054 | * since there are 2^20 xsec in a second. | ||
1055 | */ | ||
1056 | div128_by_32((1ULL << 51) - ppc_tb_freq, 0, | ||
1057 | tb_ticks_per_jiffy << SHIFT_HZ, &res); | ||
1058 | div128_by_32(res.result_high, res.result_low, NSEC_PER_SEC, &res); | ||
1059 | ticklen_to_xs = res.result_low; | ||
1060 | |||
1061 | /* Compute tb_to_xs from tick_nsec */ | ||
1062 | tb_to_xs = mulhdu(last_tick_len << TICKLEN_SHIFT, ticklen_to_xs); | ||
1063 | |||
1064 | /* | ||
1065 | * Compute scale factor for sched_clock. | 981 | * Compute scale factor for sched_clock. |
1066 | * The calibrate_decr() function has set tb_ticks_per_sec, | 982 | * The calibrate_decr() function has set tb_ticks_per_sec, |
1067 | * which is the timebase frequency. | 983 | * which is the timebase frequency. |
@@ -1082,21 +998,14 @@ void __init time_init(void) | |||
1082 | /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ | 998 | /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ |
1083 | boot_tb = get_tb_or_rtc(); | 999 | boot_tb = get_tb_or_rtc(); |
1084 | 1000 | ||
1085 | write_seqlock_irqsave(&xtime_lock, flags); | ||
1086 | |||
1087 | /* If platform provided a timezone (pmac), we correct the time */ | 1001 | /* If platform provided a timezone (pmac), we correct the time */ |
1088 | if (timezone_offset) { | 1002 | if (timezone_offset) { |
1089 | sys_tz.tz_minuteswest = -timezone_offset / 60; | 1003 | sys_tz.tz_minuteswest = -timezone_offset / 60; |
1090 | sys_tz.tz_dsttime = 0; | 1004 | sys_tz.tz_dsttime = 0; |
1091 | } | 1005 | } |
1092 | 1006 | ||
1093 | vdso_data->tb_orig_stamp = tb_last_jiffy; | ||
1094 | vdso_data->tb_update_count = 0; | 1007 | vdso_data->tb_update_count = 0; |
1095 | vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; | 1008 | vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; |
1096 | vdso_data->stamp_xsec = (u64) get_seconds() * XSEC_PER_SEC; | ||
1097 | vdso_data->tb_to_xs = tb_to_xs; | ||
1098 | |||
1099 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
1100 | 1009 | ||
1101 | /* Start the decrementer on CPUs that have manual control | 1010 | /* Start the decrementer on CPUs that have manual control |
1102 | * such as BookE | 1011 | * such as BookE |
@@ -1190,39 +1099,6 @@ void to_tm(int tim, struct rtc_time * tm) | |||
1190 | GregorianDay(tm); | 1099 | GregorianDay(tm); |
1191 | } | 1100 | } |
1192 | 1101 | ||
1193 | /* Auxiliary function to compute scaling factors */ | ||
1194 | /* Actually the choice of a timebase running at 1/4 the of the bus | ||
1195 | * frequency giving resolution of a few tens of nanoseconds is quite nice. | ||
1196 | * It makes this computation very precise (27-28 bits typically) which | ||
1197 | * is optimistic considering the stability of most processor clock | ||
1198 | * oscillators and the precision with which the timebase frequency | ||
1199 | * is measured but does not harm. | ||
1200 | */ | ||
1201 | unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) | ||
1202 | { | ||
1203 | unsigned mlt=0, tmp, err; | ||
1204 | /* No concern for performance, it's done once: use a stupid | ||
1205 | * but safe and compact method to find the multiplier. | ||
1206 | */ | ||
1207 | |||
1208 | for (tmp = 1U<<31; tmp != 0; tmp >>= 1) { | ||
1209 | if (mulhwu(inscale, mlt|tmp) < outscale) | ||
1210 | mlt |= tmp; | ||
1211 | } | ||
1212 | |||
1213 | /* We might still be off by 1 for the best approximation. | ||
1214 | * A side effect of this is that if outscale is too large | ||
1215 | * the returned value will be zero. | ||
1216 | * Many corner cases have been checked and seem to work, | ||
1217 | * some might have been forgotten in the test however. | ||
1218 | */ | ||
1219 | |||
1220 | err = inscale * (mlt+1); | ||
1221 | if (err <= inscale/2) | ||
1222 | mlt++; | ||
1223 | return mlt; | ||
1224 | } | ||
1225 | |||
1226 | /* | 1102 | /* |
1227 | * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit | 1103 | * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit |
1228 | * result. | 1104 | * result. |
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index ee038d4bf252..4ee09ee2e836 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S | |||
@@ -19,8 +19,10 @@ | |||
19 | /* Offset for the low 32-bit part of a field of long type */ | 19 | /* Offset for the low 32-bit part of a field of long type */ |
20 | #ifdef CONFIG_PPC64 | 20 | #ifdef CONFIG_PPC64 |
21 | #define LOPART 4 | 21 | #define LOPART 4 |
22 | #define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART | ||
22 | #else | 23 | #else |
23 | #define LOPART 0 | 24 | #define LOPART 0 |
25 | #define TSPEC_TV_SEC TSPC32_TV_SEC | ||
24 | #endif | 26 | #endif |
25 | 27 | ||
26 | .text | 28 | .text |
@@ -41,23 +43,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) | |||
41 | mr r9, r3 /* datapage ptr in r9 */ | 43 | mr r9, r3 /* datapage ptr in r9 */ |
42 | cmplwi r10,0 /* check if tv is NULL */ | 44 | cmplwi r10,0 /* check if tv is NULL */ |
43 | beq 3f | 45 | beq 3f |
44 | bl __do_get_xsec@local /* get xsec from tb & kernel */ | 46 | lis r7,1000000@ha /* load up USEC_PER_SEC */ |
45 | bne- 2f /* out of line -> do syscall */ | 47 | addi r7,r7,1000000@l /* so we get microseconds in r4 */ |
46 | 48 | bl __do_get_tspec@local /* get sec/usec from tb & kernel */ | |
47 | /* seconds are xsec >> 20 */ | 49 | stw r3,TVAL32_TV_SEC(r10) |
48 | rlwinm r5,r4,12,20,31 | 50 | stw r4,TVAL32_TV_USEC(r10) |
49 | rlwimi r5,r3,12,0,19 | ||
50 | stw r5,TVAL32_TV_SEC(r10) | ||
51 | |||
52 | /* get remaining xsec and convert to usec. we scale | ||
53 | * up remaining xsec by 12 bits and get the top 32 bits | ||
54 | * of the multiplication | ||
55 | */ | ||
56 | rlwinm r5,r4,12,0,19 | ||
57 | lis r6,1000000@h | ||
58 | ori r6,r6,1000000@l | ||
59 | mulhwu r5,r5,r6 | ||
60 | stw r5,TVAL32_TV_USEC(r10) | ||
61 | 51 | ||
62 | 3: cmplwi r11,0 /* check if tz is NULL */ | 52 | 3: cmplwi r11,0 /* check if tz is NULL */ |
63 | beq 1f | 53 | beq 1f |
@@ -70,14 +60,6 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) | |||
70 | crclr cr0*4+so | 60 | crclr cr0*4+so |
71 | li r3,0 | 61 | li r3,0 |
72 | blr | 62 | blr |
73 | |||
74 | 2: | ||
75 | mtlr r12 | ||
76 | mr r3,r10 | ||
77 | mr r4,r11 | ||
78 | li r0,__NR_gettimeofday | ||
79 | sc | ||
80 | blr | ||
81 | .cfi_endproc | 63 | .cfi_endproc |
82 | V_FUNCTION_END(__kernel_gettimeofday) | 64 | V_FUNCTION_END(__kernel_gettimeofday) |
83 | 65 | ||
@@ -100,7 +82,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) | |||
100 | mr r11,r4 /* r11 saves tp */ | 82 | mr r11,r4 /* r11 saves tp */ |
101 | bl __get_datapage@local /* get data page */ | 83 | bl __get_datapage@local /* get data page */ |
102 | mr r9,r3 /* datapage ptr in r9 */ | 84 | mr r9,r3 /* datapage ptr in r9 */ |
103 | 85 | lis r7,NSEC_PER_SEC@h /* want nanoseconds */ | |
86 | ori r7,r7,NSEC_PER_SEC@l | ||
104 | 50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ | 87 | 50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ |
105 | bne cr1,80f /* not monotonic -> all done */ | 88 | bne cr1,80f /* not monotonic -> all done */ |
106 | 89 | ||
@@ -198,83 +181,12 @@ V_FUNCTION_END(__kernel_clock_getres) | |||
198 | 181 | ||
199 | 182 | ||
200 | /* | 183 | /* |
201 | * This is the core of gettimeofday() & friends, it returns the xsec | 184 | * This is the core of clock_gettime() and gettimeofday(), |
202 | * value in r3 & r4 and expects the datapage ptr (non clobbered) | 185 | * it returns the current time in r3 (seconds) and r4. |
203 | * in r9. clobbers r0,r4,r5,r6,r7,r8. | 186 | * On entry, r7 gives the resolution of r4, either USEC_PER_SEC |
204 | * When returning, r8 contains the counter value that can be reused | 187 | * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds. |
205 | * by the monotonic clock implementation | ||
206 | */ | ||
207 | __do_get_xsec: | ||
208 | .cfi_startproc | ||
209 | /* Check for update count & load values. We use the low | ||
210 | * order 32 bits of the update count | ||
211 | */ | ||
212 | 1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) | ||
213 | andi. r0,r8,1 /* pending update ? loop */ | ||
214 | bne- 1b | ||
215 | xor r0,r8,r8 /* create dependency */ | ||
216 | add r9,r9,r0 | ||
217 | |||
218 | /* Load orig stamp (offset to TB) */ | ||
219 | lwz r5,CFG_TB_ORIG_STAMP(r9) | ||
220 | lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) | ||
221 | |||
222 | /* Get a stable TB value */ | ||
223 | 2: mftbu r3 | ||
224 | mftbl r4 | ||
225 | mftbu r0 | ||
226 | cmpl cr0,r3,r0 | ||
227 | bne- 2b | ||
228 | |||
229 | /* Substract tb orig stamp. If the high part is non-zero, we jump to | ||
230 | * the slow path which call the syscall. | ||
231 | * If it's ok, then we have our 32 bits tb_ticks value in r7 | ||
232 | */ | ||
233 | subfc r7,r6,r4 | ||
234 | subfe. r0,r5,r3 | ||
235 | bne- 3f | ||
236 | |||
237 | /* Load scale factor & do multiplication */ | ||
238 | lwz r5,CFG_TB_TO_XS(r9) /* load values */ | ||
239 | lwz r6,(CFG_TB_TO_XS+4)(r9) | ||
240 | mulhwu r4,r7,r5 | ||
241 | mulhwu r6,r7,r6 | ||
242 | mullw r0,r7,r5 | ||
243 | addc r6,r6,r0 | ||
244 | |||
245 | /* At this point, we have the scaled xsec value in r4 + XER:CA | ||
246 | * we load & add the stamp since epoch | ||
247 | */ | ||
248 | lwz r5,CFG_STAMP_XSEC(r9) | ||
249 | lwz r6,(CFG_STAMP_XSEC+4)(r9) | ||
250 | adde r4,r4,r6 | ||
251 | addze r3,r5 | ||
252 | |||
253 | /* We now have our result in r3,r4. We create a fake dependency | ||
254 | * on that result and re-check the counter | ||
255 | */ | ||
256 | or r6,r4,r3 | ||
257 | xor r0,r6,r6 | ||
258 | add r9,r9,r0 | ||
259 | lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) | ||
260 | cmpl cr0,r8,r0 /* check if updated */ | ||
261 | bne- 1b | ||
262 | |||
263 | /* Warning ! The caller expects CR:EQ to be set to indicate a | ||
264 | * successful calculation (so it won't fallback to the syscall | ||
265 | * method). We have overriden that CR bit in the counter check, | ||
266 | * but fortunately, the loop exit condition _is_ CR:EQ set, so | ||
267 | * we can exit safely here. If you change this code, be careful | ||
268 | * of that side effect. | ||
269 | */ | ||
270 | 3: blr | ||
271 | .cfi_endproc | ||
272 | |||
273 | /* | ||
274 | * This is the core of clock_gettime(), it returns the current | ||
275 | * time in seconds and nanoseconds in r3 and r4. | ||
276 | * It expects the datapage ptr in r9 and doesn't clobber it. | 188 | * It expects the datapage ptr in r9 and doesn't clobber it. |
277 | * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7. | 189 | * It clobbers r0, r5 and r6. |
278 | * On return, r8 contains the counter value that can be reused. | 190 | * On return, r8 contains the counter value that can be reused. |
279 | * This clobbers cr0 but not any other cr field. | 191 | * This clobbers cr0 but not any other cr field. |
280 | */ | 192 | */ |
@@ -297,70 +209,58 @@ __do_get_tspec: | |||
297 | 2: mftbu r3 | 209 | 2: mftbu r3 |
298 | mftbl r4 | 210 | mftbl r4 |
299 | mftbu r0 | 211 | mftbu r0 |
300 | cmpl cr0,r3,r0 | 212 | cmplw cr0,r3,r0 |
301 | bne- 2b | 213 | bne- 2b |
302 | 214 | ||
303 | /* Subtract tb orig stamp and shift left 12 bits. | 215 | /* Subtract tb orig stamp and shift left 12 bits. |
304 | */ | 216 | */ |
305 | subfc r7,r6,r4 | 217 | subfc r4,r6,r4 |
306 | subfe r0,r5,r3 | 218 | subfe r0,r5,r3 |
307 | slwi r0,r0,12 | 219 | slwi r0,r0,12 |
308 | rlwimi. r0,r7,12,20,31 | 220 | rlwimi. r0,r4,12,20,31 |
309 | slwi r7,r7,12 | 221 | slwi r4,r4,12 |
310 | 222 | ||
311 | /* Load scale factor & do multiplication */ | 223 | /* |
224 | * Load scale factor & do multiplication. | ||
225 | * We only use the high 32 bits of the tb_to_xs value. | ||
226 | * Even with a 1GHz timebase clock, the high 32 bits of | ||
227 | * tb_to_xs will be at least 4 million, so the error from | ||
228 | * ignoring the low 32 bits will be no more than 0.25ppm. | ||
229 | * The error will just make the clock run very very slightly | ||
230 | * slow until the next time the kernel updates the VDSO data, | ||
231 | * at which point the clock will catch up to the kernel's value, | ||
232 | * so there is no long-term error accumulation. | ||
233 | */ | ||
312 | lwz r5,CFG_TB_TO_XS(r9) /* load values */ | 234 | lwz r5,CFG_TB_TO_XS(r9) /* load values */ |
313 | lwz r6,(CFG_TB_TO_XS+4)(r9) | 235 | mulhwu r4,r4,r5 |
314 | mulhwu r3,r7,r6 | ||
315 | mullw r10,r7,r5 | ||
316 | mulhwu r4,r7,r5 | ||
317 | addc r10,r3,r10 | ||
318 | li r3,0 | 236 | li r3,0 |
319 | 237 | ||
320 | beq+ 4f /* skip high part computation if 0 */ | 238 | beq+ 4f /* skip high part computation if 0 */ |
321 | mulhwu r3,r0,r5 | 239 | mulhwu r3,r0,r5 |
322 | mullw r7,r0,r5 | 240 | mullw r5,r0,r5 |
323 | mulhwu r5,r0,r6 | ||
324 | mullw r6,r0,r6 | ||
325 | adde r4,r4,r7 | ||
326 | addze r3,r3 | ||
327 | addc r4,r4,r5 | 241 | addc r4,r4,r5 |
328 | addze r3,r3 | 242 | addze r3,r3 |
329 | addc r10,r10,r6 | 243 | 4: |
330 | 244 | /* At this point, we have seconds since the xtime stamp | |
331 | 4: addze r4,r4 /* add in carry */ | 245 | * as a 32.32 fixed-point number in r3 and r4. |
332 | lis r7,NSEC_PER_SEC@h | 246 | * Load & add the xtime stamp. |
333 | ori r7,r7,NSEC_PER_SEC@l | ||
334 | mulhwu r4,r4,r7 /* convert to nanoseconds */ | ||
335 | |||
336 | /* At this point, we have seconds & nanoseconds since the xtime | ||
337 | * stamp in r3+CA and r4. Load & add the xtime stamp. | ||
338 | */ | 247 | */ |
339 | #ifdef CONFIG_PPC64 | 248 | lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9) |
340 | lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9) | 249 | lwz r6,STAMP_SEC_FRAC(r9) |
341 | lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9) | 250 | addc r4,r4,r6 |
342 | #else | ||
343 | lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9) | ||
344 | lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9) | ||
345 | #endif | ||
346 | add r4,r4,r6 | ||
347 | adde r3,r3,r5 | 251 | adde r3,r3,r5 |
348 | 252 | ||
349 | /* We now have our result in r3,r4. We create a fake dependency | 253 | /* We create a fake dependency on the result in r3/r4 |
350 | * on that result and re-check the counter | 254 | * and re-check the counter |
351 | */ | 255 | */ |
352 | or r6,r4,r3 | 256 | or r6,r4,r3 |
353 | xor r0,r6,r6 | 257 | xor r0,r6,r6 |
354 | add r9,r9,r0 | 258 | add r9,r9,r0 |
355 | lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) | 259 | lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) |
356 | cmpl cr0,r8,r0 /* check if updated */ | 260 | cmplw cr0,r8,r0 /* check if updated */ |
357 | bne- 1b | 261 | bne- 1b |
358 | 262 | ||
359 | /* check for nanosecond overflow and adjust if necessary */ | 263 | mulhwu r4,r4,r7 /* convert to micro or nanoseconds */ |
360 | cmpw r4,r7 | ||
361 | bltlr /* all done if no overflow */ | ||
362 | subf r4,r7,r4 /* adjust if overflow */ | ||
363 | addi r3,r3,1 | ||
364 | 264 | ||
365 | blr | 265 | blr |
366 | .cfi_endproc | 266 | .cfi_endproc |
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 262cd5857a56..e97a9a0dc4ac 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S | |||
@@ -33,18 +33,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) | |||
33 | bl V_LOCAL_FUNC(__get_datapage) /* get data page */ | 33 | bl V_LOCAL_FUNC(__get_datapage) /* get data page */ |
34 | cmpldi r11,0 /* check if tv is NULL */ | 34 | cmpldi r11,0 /* check if tv is NULL */ |
35 | beq 2f | 35 | beq 2f |
36 | bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ | 36 | lis r7,1000000@ha /* load up USEC_PER_SEC */ |
37 | lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ | 37 | addi r7,r7,1000000@l |
38 | ori r7,r7,16960 | 38 | bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ |
39 | rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ | 39 | std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ |
40 | rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ | 40 | std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ |
41 | std r5,TVAL64_TV_SEC(r11) /* store sec in tv */ | ||
42 | subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ | ||
43 | mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / | ||
44 | * XSEC_PER_SEC | ||
45 | */ | ||
46 | rldicl r0,r0,44,20 | ||
47 | std r0,TVAL64_TV_USEC(r11) /* store usec in tv */ | ||
48 | 2: cmpldi r10,0 /* check if tz is NULL */ | 41 | 2: cmpldi r10,0 /* check if tz is NULL */ |
49 | beq 1f | 42 | beq 1f |
50 | lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ | 43 | lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ |
@@ -77,6 +70,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) | |||
77 | .cfi_register lr,r12 | 70 | .cfi_register lr,r12 |
78 | mr r11,r4 /* r11 saves tp */ | 71 | mr r11,r4 /* r11 saves tp */ |
79 | bl V_LOCAL_FUNC(__get_datapage) /* get data page */ | 72 | bl V_LOCAL_FUNC(__get_datapage) /* get data page */ |
73 | lis r7,NSEC_PER_SEC@h /* want nanoseconds */ | ||
74 | ori r7,r7,NSEC_PER_SEC@l | ||
80 | 50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ | 75 | 50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ |
81 | bne cr1,80f /* if not monotonic, all done */ | 76 | bne cr1,80f /* if not monotonic, all done */ |
82 | 77 | ||
@@ -171,49 +166,12 @@ V_FUNCTION_END(__kernel_clock_getres) | |||
171 | 166 | ||
172 | 167 | ||
173 | /* | 168 | /* |
174 | * This is the core of gettimeofday(), it returns the xsec | 169 | * This is the core of clock_gettime() and gettimeofday(), |
175 | * value in r4 and expects the datapage ptr (non clobbered) | 170 | * it returns the current time in r4 (seconds) and r5. |
176 | * in r3. clobbers r0,r4,r5,r6,r7,r8 | 171 | * On entry, r7 gives the resolution of r5, either USEC_PER_SEC |
177 | * When returning, r8 contains the counter value that can be reused | 172 | * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. |
178 | */ | ||
179 | V_FUNCTION_BEGIN(__do_get_xsec) | ||
180 | .cfi_startproc | ||
181 | /* check for update count & load values */ | ||
182 | 1: ld r8,CFG_TB_UPDATE_COUNT(r3) | ||
183 | andi. r0,r8,1 /* pending update ? loop */ | ||
184 | bne- 1b | ||
185 | xor r0,r8,r8 /* create dependency */ | ||
186 | add r3,r3,r0 | ||
187 | |||
188 | /* Get TB & offset it. We use the MFTB macro which will generate | ||
189 | * workaround code for Cell. | ||
190 | */ | ||
191 | MFTB(r7) | ||
192 | ld r9,CFG_TB_ORIG_STAMP(r3) | ||
193 | subf r7,r9,r7 | ||
194 | |||
195 | /* Scale result */ | ||
196 | ld r5,CFG_TB_TO_XS(r3) | ||
197 | mulhdu r7,r7,r5 | ||
198 | |||
199 | /* Add stamp since epoch */ | ||
200 | ld r6,CFG_STAMP_XSEC(r3) | ||
201 | add r4,r6,r7 | ||
202 | |||
203 | xor r0,r4,r4 | ||
204 | add r3,r3,r0 | ||
205 | ld r0,CFG_TB_UPDATE_COUNT(r3) | ||
206 | cmpld cr0,r0,r8 /* check if updated */ | ||
207 | bne- 1b | ||
208 | blr | ||
209 | .cfi_endproc | ||
210 | V_FUNCTION_END(__do_get_xsec) | ||
211 | |||
212 | /* | ||
213 | * This is the core of clock_gettime(), it returns the current | ||
214 | * time in seconds and nanoseconds in r4 and r5. | ||
215 | * It expects the datapage ptr in r3 and doesn't clobber it. | 173 | * It expects the datapage ptr in r3 and doesn't clobber it. |
216 | * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7. | 174 | * It clobbers r0, r6 and r9. |
217 | * On return, r8 contains the counter value that can be reused. | 175 | * On return, r8 contains the counter value that can be reused. |
218 | * This clobbers cr0 but not any other cr field. | 176 | * This clobbers cr0 but not any other cr field. |
219 | */ | 177 | */ |
@@ -229,18 +187,18 @@ V_FUNCTION_BEGIN(__do_get_tspec) | |||
229 | /* Get TB & offset it. We use the MFTB macro which will generate | 187 | /* Get TB & offset it. We use the MFTB macro which will generate |
230 | * workaround code for Cell. | 188 | * workaround code for Cell. |
231 | */ | 189 | */ |
232 | MFTB(r7) | 190 | MFTB(r6) |
233 | ld r9,CFG_TB_ORIG_STAMP(r3) | 191 | ld r9,CFG_TB_ORIG_STAMP(r3) |
234 | subf r7,r9,r7 | 192 | subf r6,r9,r6 |
235 | 193 | ||
236 | /* Scale result */ | 194 | /* Scale result */ |
237 | ld r5,CFG_TB_TO_XS(r3) | 195 | ld r5,CFG_TB_TO_XS(r3) |
238 | sldi r7,r7,12 /* compute time since stamp_xtime */ | 196 | sldi r6,r6,12 /* compute time since stamp_xtime */ |
239 | mulhdu r6,r7,r5 /* in units of 2^-32 seconds */ | 197 | mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ |
240 | 198 | ||
241 | /* Add stamp since epoch */ | 199 | /* Add stamp since epoch */ |
242 | ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) | 200 | ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3) |
243 | ld r5,STAMP_XTIME+TSPC64_TV_NSEC(r3) | 201 | lwz r5,STAMP_SEC_FRAC(r3) |
244 | or r0,r4,r5 | 202 | or r0,r4,r5 |
245 | or r0,r0,r6 | 203 | or r0,r0,r6 |
246 | xor r0,r0,r0 | 204 | xor r0,r0,r0 |
@@ -250,17 +208,11 @@ V_FUNCTION_BEGIN(__do_get_tspec) | |||
250 | bne- 1b /* reload if so */ | 208 | bne- 1b /* reload if so */ |
251 | 209 | ||
252 | /* convert to seconds & nanoseconds and add to stamp */ | 210 | /* convert to seconds & nanoseconds and add to stamp */ |
253 | lis r7,NSEC_PER_SEC@h | 211 | add r6,r6,r5 /* add on fractional seconds of xtime */ |
254 | ori r7,r7,NSEC_PER_SEC@l | 212 | mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ |
255 | mulhwu r0,r6,r7 /* compute nanoseconds and */ | ||
256 | srdi r6,r6,32 /* seconds since stamp_xtime */ | 213 | srdi r6,r6,32 /* seconds since stamp_xtime */ |
257 | clrldi r0,r0,32 | 214 | clrldi r5,r5,32 |
258 | add r5,r5,r0 /* add nanoseconds together */ | ||
259 | cmpd r5,r7 /* overflow? */ | ||
260 | add r4,r4,r6 | 215 | add r4,r4,r6 |
261 | bltlr /* all done if no overflow */ | ||
262 | subf r5,r7,r5 /* if overflow, adjust */ | ||
263 | addi r4,r4,1 | ||
264 | blr | 216 | blr |
265 | .cfi_endproc | 217 | .cfi_endproc |
266 | V_FUNCTION_END(__do_get_tspec) | 218 | V_FUNCTION_END(__do_get_tspec) |