powerpc: Rework VDSO gettimeofday to prevent time going backwards

Currently it is possible for userspace to see the result of gettimeofday() going backwards by 1 microsecond, assuming that userspace is using the gettimeofday() in the VDSO. The VDSO gettimeofday() algorithm computes the time in "xsecs", which are units of 2^-20 seconds, or approximately 0.954 microseconds, using the algorithm now = (timebase - tb_orig_stamp) * tb_to_xs + stamp_xsec and then converts the time in xsecs to seconds and microseconds. The kernel updates the tb_orig_stamp and stamp_xsec values every tick in update_vsyscall(). If the length of the tick is not an integer number of xsecs, then some precision is lost in converting the current time to xsecs. For example, with CONFIG_HZ=1000, the tick is 1ms long, which is 1048.576 xsecs. That means that stamp_xsec will advance by either 1048 or 1049 on each tick. With the right conditions, it is possible for userspace to get (timebase - tb_orig_stamp) * tb_to_xs being 1049 if the kernel is slightly late in updating the vdso_datapage, and then for stamp_xsec to advance by 1048 when the kernel does update it, and for userspace to then see (timebase - tb_orig_stamp) * tb_to_xs being zero due to integer truncation. The result is that time appears to go backwards by 1 microsecond. To fix this we change the VDSO gettimeofday to use a new field in the VDSO datapage which stores the nanoseconds part of the time as a fractional number of seconds in a 0.32 binary fraction format. (Or put another way, as a 32-bit number in units of 0.23283 ns.) This is convenient because we can use the mulhwu instruction to convert it to either microseconds or nanoseconds. Since it turns out that computing the time of day using this new field is simpler than either using stamp_xsec (as gettimeofday does) or stamp_xtime.tv_nsec (as clock_gettime does), this converts both gettimeofday and clock_gettime to use the new field. The existing __do_get_tspec function is converted to use the new field and take a parameter in r7 that indicates the desired resolution, 1,000,000 for microseconds or 1,000,000,000 for nanoseconds. The __do_get_xsec function is then unused and is deleted. The new algorithm is now = ((timebase - tb_orig_stamp) << 12) * tb_to_xs + (stamp_xtime_seconds << 32) + stamp_sec_fraction with 'now' in units of 2^-32 seconds. That is then converted to seconds and either microseconds or nanoseconds with seconds = now >> 32 partseconds = ((now & 0xffffffff) * resolution) >> 32 The 32-bit VDSO code also makes a further simplification: it ignores the bottom 32 bits of the tb_to_xs value, which is a 0.64 format binary fraction. Doing so gets rid of 4 multiply instructions. Assuming a timebase frequency of 1GHz or less and an update interval of no more than 10ms, the upper 32 bits of tb_to_xs will be at least 4503599, so the error from ignoring the low 32 bits will be at most 2.2ns, which is more than an order of magnitude less than the time taken to do gettimeofday or clock_gettime on our fastest processors, so there is no possibility of seeing inconsistent values due to this. This also moves update_gtod() down next to its only caller, and makes update_vsyscall use the time passed in via the wall_time argument rather than accessing xtime directly. At present, wall_time always points to xtime, but that could change in future. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
author: Paul Mackerras <paulus@samba.org> 2010-06-20 15:03:08 -0400
committer: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2010-07-08 21:26:16 -0400
commit: 8fd63a9ea7528463211a6c88d500c51851d960c8 (patch)
tree: a24f11824e6c31ebd632ff5bcfb27a6e45713f7c /arch/powerpc/kernel/vdso32
parent: 5f07aa7524e98d6f68f2bec54f155ef6012e2c9a (diff)
1 files changed, 42 insertions, 142 deletions
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index ee038d4bf252..4ee09ee2e836 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -19,8 +19,10 @@
 /* Offset for the low 32-bit part of a field of long type */
 #ifdef CONFIG_PPC64
 #define LOPART  4
+#define TSPEC_TV_SEC    TSPC64_TV_SEC+LOPART
 #else
 #define LOPART  0
+#define TSPEC_TV_SEC    TSPC32_TV_SEC
 #endif
        .text
@@ -41,23 +43,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
        mr      r9, r3                  /* datapage ptr in r9 */
        cmplwi  r10,0                   /* check if tv is NULL */
        beq     3f
-        bl      __do_get_xsec@local     /* get xsec from tb & kernel */
+        lis     r7,1000000@ha           /* load up USEC_PER_SEC */
-        bne-    2f                      /* out of line -> do syscall */
+        addi    r7,r7,1000000@l         /* so we get microseconds in r4 */
+        bl      __do_get_tspec@local    /* get sec/usec from tb & kernel */
-        /* seconds are xsec >> 20 */
+        stw     r3,TVAL32_TV_SEC(r10)
-        rlwinm  r5,r4,12,20,31
+        stw     r4,TVAL32_TV_USEC(r10)
-        rlwimi  r5,r3,12,0,19
-        stw     r5,TVAL32_TV_SEC(r10)
-        /* get remaining xsec and convert to usec. we scale
-         * up remaining xsec by 12 bits and get the top 32 bits
-         * of the multiplication
-         */
-        rlwinm  r5,r4,12,0,19
-        lis     r6,1000000@h
-        ori     r6,r6,1000000@l
-        mulhwu  r5,r5,r6
-        stw     r5,TVAL32_TV_USEC(r10)
 3:      cmplwi  r11,0                   /* check if tz is NULL */
        beq     1f
@@ -70,14 +60,6 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
        crclr   cr0*4+so
        li      r3,0
        blr
-2:
-        mtlr    r12
-        mr      r3,r10
-        mr      r4,r11
-        li      r0,__NR_gettimeofday
-        sc
-        blr
  .cfi_endproc
 V_FUNCTION_END(__kernel_gettimeofday)
@@ -100,7 +82,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
        mr      r11,r4                  /* r11 saves tp */
        bl      __get_datapage@local    /* get data page */
        mr      r9,r3                   /* datapage ptr in r9 */
+        lis     r7,NSEC_PER_SEC@h       /* want nanoseconds */
+        ori     r7,r7,NSEC_PER_SEC@l
 50:     bl      __do_get_tspec@local    /* get sec/nsec from tb & kernel */
        bne     cr1,80f                 /* not monotonic -> all done */
@@ -198,83 +181,12 @@ V_FUNCTION_END(__kernel_clock_getres)
 /*
- * This is the core of gettimeofday() & friends, it returns the xsec
+ * This is the core of clock_gettime() and gettimeofday(),
- * value in r3 & r4 and expects the datapage ptr (non clobbered)
+ * it returns the current time in r3 (seconds) and r4.
- * in r9. clobbers r0,r4,r5,r6,r7,r8.
+ * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
- * When returning, r8 contains the counter value that can be reused
+ * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
- * by the monotonic clock implementation
- */
-__do_get_xsec:
-  .cfi_startproc
-        /* Check for update count & load values. We use the low
-         * order 32 bits of the update count
-         */
-1:      lwz     r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
-        andi.   r0,r8,1                 /* pending update ? loop */
-        bne-    1b
-        xor     r0,r8,r8                /* create dependency */
-        add     r9,r9,r0
-        /* Load orig stamp (offset to TB) */
-        lwz     r5,CFG_TB_ORIG_STAMP(r9)
-        lwz     r6,(CFG_TB_ORIG_STAMP+4)(r9)
-        /* Get a stable TB value */
-2:      mftbu   r3
-        mftbl   r4
-        mftbu   r0
-        cmpl    cr0,r3,r0
-        bne-    2b
-        /* Substract tb orig stamp. If the high part is non-zero, we jump to
-         * the slow path which call the syscall.
-         * If it's ok, then we have our 32 bits tb_ticks value in r7
-         */
-        subfc   r7,r6,r4
-        subfe.  r0,r5,r3
-        bne-    3f
-        /* Load scale factor & do multiplication */
-        lwz     r5,CFG_TB_TO_XS(r9)     /* load values */
-        lwz     r6,(CFG_TB_TO_XS+4)(r9)
-        mulhwu  r4,r7,r5
-        mulhwu  r6,r7,r6
-        mullw   r0,r7,r5
-        addc    r6,r6,r0
-        /* At this point, we have the scaled xsec value in r4 + XER:CA
-         * we load & add the stamp since epoch
-         */
-        lwz     r5,CFG_STAMP_XSEC(r9)
-        lwz     r6,(CFG_STAMP_XSEC+4)(r9)
-        adde    r4,r4,r6
-        addze   r3,r5
-        /* We now have our result in r3,r4. We create a fake dependency
-         * on that result and re-check the counter
-         */
-        or      r6,r4,r3
-        xor     r0,r6,r6
-        add     r9,r9,r0
-        lwz     r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
-        cmpl    cr0,r8,r0               /* check if updated */
-        bne-    1b
-        /* Warning ! The caller expects CR:EQ to be set to indicate a
-         * successful calculation (so it won't fallback to the syscall
-         * method). We have overriden that CR bit in the counter check,
-         * but fortunately, the loop exit condition _is_ CR:EQ set, so
-         * we can exit safely here. If you change this code, be careful
-         * of that side effect.
-         */
-3:      blr
-  .cfi_endproc
-/*
- * This is the core of clock_gettime(), it returns the current
- * time in seconds and nanoseconds in r3 and r4.
 * It expects the datapage ptr in r9 and doesn't clobber it.
- * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7.
+ * It clobbers r0, r5 and r6.
 * On return, r8 contains the counter value that can be reused.
 * This clobbers cr0 but not any other cr field.
 */
@@ -297,70 +209,58 @@ __do_get_tspec:
 2:      mftbu   r3
        mftbl   r4
        mftbu   r0
-        cmpl    cr0,r3,r0
+        cmplw   cr0,r3,r0
        bne-    2b
        /* Subtract tb orig stamp and shift left 12 bits.
         */
-        subfc   r7,r6,r4
+        subfc   r4,r6,r4
        subfe   r0,r5,r3
        slwi    r0,r0,12
-        rlwimi. r0,r7,12,20,31
+        rlwimi. r0,r4,12,20,31
-        slwi    r7,r7,12
+        slwi    r4,r4,12
-        /* Load scale factor & do multiplication */
+        /*
+         * Load scale factor & do multiplication.
+         * We only use the high 32 bits of the tb_to_xs value.
+         * Even with a 1GHz timebase clock, the high 32 bits of
+         * tb_to_xs will be at least 4 million, so the error from
+         * ignoring the low 32 bits will be no more than 0.25ppm.
+         * The error will just make the clock run very very slightly
+         * slow until the next time the kernel updates the VDSO data,
+         * at which point the clock will catch up to the kernel's value,
+         * so there is no long-term error accumulation.
+         */
        lwz     r5,CFG_TB_TO_XS(r9)     /* load values */
-        lwz     r6,(CFG_TB_TO_XS+4)(r9)
+        mulhwu  r4,r4,r5
-        mulhwu  r3,r7,r6
-        mullw   r10,r7,r5
-        mulhwu  r4,r7,r5
-        addc    r10,r3,r10
        li      r3,0
        beq+    4f                      /* skip high part computation if 0 */
        mulhwu  r3,r0,r5
-        mullw   r7,r0,r5
+        mullw   r5,r0,r5
-        mulhwu  r5,r0,r6
-        mullw   r6,r0,r6
-        adde    r4,r4,r7
-        addze   r3,r3
        addc    r4,r4,r5
        addze   r3,r3
-        addc    r10,r10,r6
+4:
+        /* At this point, we have seconds since the xtime stamp
-4:      addze   r4,r4                   /* add in carry */
+         * as a 32.32 fixed-point number in r3 and r4.
-        lis     r7,NSEC_PER_SEC@h
+         * Load & add the xtime stamp.
-        ori     r7,r7,NSEC_PER_SEC@l
-        mulhwu  r4,r4,r7                /* convert to nanoseconds */
-        /* At this point, we have seconds & nanoseconds since the xtime
-         * stamp in r3+CA and r4.  Load & add the xtime stamp.
         */
-#ifdef CONFIG_PPC64
+        lwz     r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
-        lwz     r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9)
+        lwz     r6,STAMP_SEC_FRAC(r9)
-        lwz     r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9)
+        addc    r4,r4,r6
-#else
-        lwz     r5,STAMP_XTIME+TSPC32_TV_SEC(r9)
-        lwz     r6,STAMP_XTIME+TSPC32_TV_NSEC(r9)
-#endif
-        add     r4,r4,r6
        adde    r3,r3,r5
-        /* We now have our result in r3,r4. We create a fake dependency
+        /* We create a fake dependency on the result in r3/r4
-         * on that result and re-check the counter
+         * and re-check the counter
         */
        or      r6,r4,r3
        xor     r0,r6,r6
        add     r9,r9,r0
        lwz     r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
-        cmpl    cr0,r8,r0               /* check if updated */
+        cmplw   cr0,r8,r0               /* check if updated */
        bne-    1b
-        /* check for nanosecond overflow and adjust if necessary */
+        mulhwu  r4,r4,r7                /* convert to micro or nanoseconds */
-        cmpw    r4,r7
-        bltlr                           /* all done if no overflow */
-        subf    r4,r7,r4                /* adjust if overflow */
-        addi    r3,r3,1
        blr
  .cfi_endproc
author	Paul Mackerras <paulus@samba.org>	2010-06-20 15:03:08 -0400
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2010-07-08 21:26:16 -0400
commit	8fd63a9ea7528463211a6c88d500c51851d960c8 (patch)
tree	a24f11824e6c31ebd632ff5bcfb27a6e45713f7c /arch/powerpc/kernel/vdso32
parent	5f07aa7524e98d6f68f2bec54f155ef6012e2c9a (diff)

diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index ee038d4bf252..4ee09ee2e836 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -19,8 +19,10 @@
19	/* Offset for the low 32-bit part of a field of long type */	19	/* Offset for the low 32-bit part of a field of long type */
20	#ifdef CONFIG_PPC64	20	#ifdef CONFIG_PPC64
21	#define LOPART 4	21	#define LOPART 4
		22	#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART
22	#else	23	#else
23	#define LOPART 0	24	#define LOPART 0
		25	#define TSPEC_TV_SEC TSPC32_TV_SEC
24	#endif	26	#endif
25		27
26	.text	28	.text
@@ -41,23 +43,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
41	mr r9, r3 /* datapage ptr in r9 */	43	mr r9, r3 /* datapage ptr in r9 */
42	cmplwi r10,0 /* check if tv is NULL */	44	cmplwi r10,0 /* check if tv is NULL */
43	beq 3f	45	beq 3f
44	bl __do_get_xsec@local /* get xsec from tb & kernel */	46	lis r7,1000000@ha /* load up USEC_PER_SEC */
45	bne- 2f /* out of line -> do syscall */	47	addi r7,r7,1000000@l /* so we get microseconds in r4 */
46		48	bl __do_get_tspec@local /* get sec/usec from tb & kernel */
47	/* seconds are xsec >> 20 */	49	stw r3,TVAL32_TV_SEC(r10)
48	rlwinm r5,r4,12,20,31	50	stw r4,TVAL32_TV_USEC(r10)
49	rlwimi r5,r3,12,0,19
50	stw r5,TVAL32_TV_SEC(r10)
51
52	/* get remaining xsec and convert to usec. we scale
53	* up remaining xsec by 12 bits and get the top 32 bits
54	* of the multiplication
55	*/
56	rlwinm r5,r4,12,0,19
57	lis r6,1000000@h
58	ori r6,r6,1000000@l
59	mulhwu r5,r5,r6
60	stw r5,TVAL32_TV_USEC(r10)
61		51
62	3: cmplwi r11,0 /* check if tz is NULL */	52	3: cmplwi r11,0 /* check if tz is NULL */
63	beq 1f	53	beq 1f
@@ -70,14 +60,6 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
70	crclr cr0*4+so	60	crclr cr0*4+so
71	li r3,0	61	li r3,0
72	blr	62	blr
73
74	2:
75	mtlr r12
76	mr r3,r10
77	mr r4,r11
78	li r0,__NR_gettimeofday
79	sc
80	blr
81	.cfi_endproc	63	.cfi_endproc
82	V_FUNCTION_END(__kernel_gettimeofday)	64	V_FUNCTION_END(__kernel_gettimeofday)
83		65
@@ -100,7 +82,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
100	mr r11,r4 /* r11 saves tp */	82	mr r11,r4 /* r11 saves tp */
101	bl __get_datapage@local /* get data page */	83	bl __get_datapage@local /* get data page */
102	mr r9,r3 /* datapage ptr in r9 */	84	mr r9,r3 /* datapage ptr in r9 */
103		85	lis r7,NSEC_PER_SEC@h /* want nanoseconds */
		86	ori r7,r7,NSEC_PER_SEC@l
104	50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */	87	50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
105	bne cr1,80f /* not monotonic -> all done */	88	bne cr1,80f /* not monotonic -> all done */
106		89
@@ -198,83 +181,12 @@ V_FUNCTION_END(__kernel_clock_getres)
198		181
199		182
200	/*	183	/*
201	* This is the core of gettimeofday() & friends, it returns the xsec	184	* This is the core of clock_gettime() and gettimeofday(),
202	* value in r3 & r4 and expects the datapage ptr (non clobbered)	185	* it returns the current time in r3 (seconds) and r4.
203	* in r9. clobbers r0,r4,r5,r6,r7,r8.	186	* On entry, r7 gives the resolution of r4, either USEC_PER_SEC
204	* When returning, r8 contains the counter value that can be reused	187	* or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
205	* by the monotonic clock implementation
206	*/
207	__do_get_xsec:
208	.cfi_startproc
209	/* Check for update count & load values. We use the low
210	* order 32 bits of the update count
211	*/
212	1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
213	andi. r0,r8,1 /* pending update ? loop */
214	bne- 1b
215	xor r0,r8,r8 /* create dependency */
216	add r9,r9,r0
217
218	/* Load orig stamp (offset to TB) */
219	lwz r5,CFG_TB_ORIG_STAMP(r9)
220	lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
221
222	/* Get a stable TB value */
223	2: mftbu r3
224	mftbl r4
225	mftbu r0
226	cmpl cr0,r3,r0
227	bne- 2b
228
229	/* Substract tb orig stamp. If the high part is non-zero, we jump to
230	* the slow path which call the syscall.
231	* If it's ok, then we have our 32 bits tb_ticks value in r7
232	*/
233	subfc r7,r6,r4
234	subfe. r0,r5,r3
235	bne- 3f
236
237	/* Load scale factor & do multiplication */
238	lwz r5,CFG_TB_TO_XS(r9) /* load values */
239	lwz r6,(CFG_TB_TO_XS+4)(r9)
240	mulhwu r4,r7,r5
241	mulhwu r6,r7,r6
242	mullw r0,r7,r5
243	addc r6,r6,r0
244
245	/* At this point, we have the scaled xsec value in r4 + XER:CA
246	* we load & add the stamp since epoch
247	*/
248	lwz r5,CFG_STAMP_XSEC(r9)
249	lwz r6,(CFG_STAMP_XSEC+4)(r9)
250	adde r4,r4,r6
251	addze r3,r5
252
253	/* We now have our result in r3,r4. We create a fake dependency
254	* on that result and re-check the counter
255	*/
256	or r6,r4,r3
257	xor r0,r6,r6
258	add r9,r9,r0
259	lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
260	cmpl cr0,r8,r0 /* check if updated */
261	bne- 1b
262
263	/* Warning ! The caller expects CR:EQ to be set to indicate a
264	* successful calculation (so it won't fallback to the syscall
265	* method). We have overriden that CR bit in the counter check,
266	* but fortunately, the loop exit condition _is_ CR:EQ set, so
267	* we can exit safely here. If you change this code, be careful
268	* of that side effect.
269	*/
270	3: blr
271	.cfi_endproc
272
273	/*
274	* This is the core of clock_gettime(), it returns the current
275	* time in seconds and nanoseconds in r3 and r4.
276	* It expects the datapage ptr in r9 and doesn't clobber it.	188	* It expects the datapage ptr in r9 and doesn't clobber it.
277	* It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7.	189	* It clobbers r0, r5 and r6.
278	* On return, r8 contains the counter value that can be reused.	190	* On return, r8 contains the counter value that can be reused.
279	* This clobbers cr0 but not any other cr field.	191	* This clobbers cr0 but not any other cr field.
280	*/	192	*/
@@ -297,70 +209,58 @@ __do_get_tspec:
297	2: mftbu r3	209	2: mftbu r3
298	mftbl r4	210	mftbl r4
299	mftbu r0	211	mftbu r0
300	cmpl cr0,r3,r0	212	cmplw cr0,r3,r0
301	bne- 2b	213	bne- 2b
302		214
303	/* Subtract tb orig stamp and shift left 12 bits.	215	/* Subtract tb orig stamp and shift left 12 bits.
304	*/	216	*/
305	subfc r7,r6,r4	217	subfc r4,r6,r4
306	subfe r0,r5,r3	218	subfe r0,r5,r3
307	slwi r0,r0,12	219	slwi r0,r0,12
308	rlwimi. r0,r7,12,20,31	220	rlwimi. r0,r4,12,20,31
309	slwi r7,r7,12	221	slwi r4,r4,12
310		222
311	/* Load scale factor & do multiplication */	223	/*
		224	* Load scale factor & do multiplication.
		225	* We only use the high 32 bits of the tb_to_xs value.
		226	* Even with a 1GHz timebase clock, the high 32 bits of
		227	* tb_to_xs will be at least 4 million, so the error from
		228	* ignoring the low 32 bits will be no more than 0.25ppm.
		229	* The error will just make the clock run very very slightly
		230	* slow until the next time the kernel updates the VDSO data,
		231	* at which point the clock will catch up to the kernel's value,
		232	* so there is no long-term error accumulation.
		233	*/
312	lwz r5,CFG_TB_TO_XS(r9) /* load values */	234	lwz r5,CFG_TB_TO_XS(r9) /* load values */
313	lwz r6,(CFG_TB_TO_XS+4)(r9)	235	mulhwu r4,r4,r5
314	mulhwu r3,r7,r6
315	mullw r10,r7,r5
316	mulhwu r4,r7,r5
317	addc r10,r3,r10
318	li r3,0	236	li r3,0
319		237
320	beq+ 4f /* skip high part computation if 0 */	238	beq+ 4f /* skip high part computation if 0 */
321	mulhwu r3,r0,r5	239	mulhwu r3,r0,r5
322	mullw r7,r0,r5	240	mullw r5,r0,r5
323	mulhwu r5,r0,r6
324	mullw r6,r0,r6
325	adde r4,r4,r7
326	addze r3,r3
327	addc r4,r4,r5	241	addc r4,r4,r5
328	addze r3,r3	242	addze r3,r3
329	addc r10,r10,r6	243	4:
330		244	/* At this point, we have seconds since the xtime stamp
331	4: addze r4,r4 /* add in carry */	245	* as a 32.32 fixed-point number in r3 and r4.
332	lis r7,NSEC_PER_SEC@h	246	* Load & add the xtime stamp.
333	ori r7,r7,NSEC_PER_SEC@l
334	mulhwu r4,r4,r7 /* convert to nanoseconds */
335
336	/* At this point, we have seconds & nanoseconds since the xtime
337	* stamp in r3+CA and r4. Load & add the xtime stamp.
338	*/	247	*/
339	#ifdef CONFIG_PPC64	248	lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
340	lwz r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9)	249	lwz r6,STAMP_SEC_FRAC(r9)
341	lwz r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9)	250	addc r4,r4,r6
342	#else
343	lwz r5,STAMP_XTIME+TSPC32_TV_SEC(r9)
344	lwz r6,STAMP_XTIME+TSPC32_TV_NSEC(r9)
345	#endif
346	add r4,r4,r6
347	adde r3,r3,r5	251	adde r3,r3,r5
348		252
349	/* We now have our result in r3,r4. We create a fake dependency	253	/* We create a fake dependency on the result in r3/r4
350	* on that result and re-check the counter	254	* and re-check the counter
351	*/	255	*/
352	or r6,r4,r3	256	or r6,r4,r3
353	xor r0,r6,r6	257	xor r0,r6,r6
354	add r9,r9,r0	258	add r9,r9,r0
355	lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)	259	lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
356	cmpl cr0,r8,r0 /* check if updated */	260	cmplw cr0,r8,r0 /* check if updated */
357	bne- 1b	261	bne- 1b
358		262
359	/* check for nanosecond overflow and adjust if necessary */	263	mulhwu r4,r4,r7 /* convert to micro or nanoseconds */
360	cmpw r4,r7
361	bltlr /* all done if no overflow */
362	subf r4,r7,r4 /* adjust if overflow */
363	addi r3,r3,1
364		264
365	blr	265	blr
366	.cfi_endproc	266	.cfi_endproc