[ARM] 3978/1: macro to provide a 63-bit value from a 32-bit hardware counter

This is done in a completely lockless fashion. Bits 0 to 31 of the count are provided by the hardware while bits 32 to 62 are stored in memory. The top bit in memory is used to synchronize with the hardware count half-period. When the top bit of both counters (hardware and in memory) differ then the memory is updated with a new value, incrementing it when the hardware counter wraps around. Because a word store in memory is atomic then the incremented value will always be in synch with the top bit indicating to any potential concurrent reader if the value in memory is up to date or not wrt the needed increment. And any race in updating the value in memory is harmless as the same value would be stored more than once. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
author: Nicolas Pitre <nico@cam.org> 2006-12-04 14:19:31 -0500
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2006-12-07 11:06:45 -0500
commit: 838ccbc35eae5b44d47724e5f694dbec4a26d269 (patch)
tree: 374cd5d33b42cd5464bd45d62193307bc5f994ce /include
parent: fa4adc614922c24601320e55bc5a1f837abad6e9 (diff)
1 files changed, 78 insertions, 0 deletions
diff --git a/include/asm-arm/cnt32_to_63.h b/include/asm-arm/cnt32_to_63.h
new file mode 100644
index 000000000000..480c873fa746
--- /dev/null
+++ b/include/asm-arm/cnt32_to_63.h
@@ -0,0 +1,78 @@
+/*
+ *  include/asm/cnt32_to_63.h -- extend a 32-bit counter to 63 bits
+ *
+ *  Author:     Nicolas Pitre
+ *  Created:    December 3, 2006
+ *  Copyright:  MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+#ifndef __INCLUDE_CNT32_TO_63_H__
+#define __INCLUDE_CNT32_TO_63_H__
+#include <linux/compiler.h>
+#include <asm/types.h>
+#include <asm/byteorder.h>
+/*
+ * Prototype: u64 cnt32_to_63(u32 cnt)
+ * Many hardware clock counters are only 32 bits wide and therefore have
+ * a relatively short period making wrap-arounds rather frequent.  This
+ * is a problem when implementing sched_clock() for example, where a 64-bit
+ * non-wrapping monotonic value is expected to be returned.
+ *
+ * To overcome that limitation, let's extend a 32-bit counter to 63 bits
+ * in a completely lock free fashion. Bits 0 to 31 of the clock are provided
+ * by the hardware while bits 32 to 62 are stored in memory.  The top bit in
+ * memory is used to synchronize with the hardware clock half-period.  When
+ * the top bit of both counters (hardware and in memory) differ then the
+ * memory is updated with a new value, incrementing it when the hardware
+ * counter wraps around.
+ *
+ * Because a word store in memory is atomic then the incremented value will
+ * always be in synch with the top bit indicating to any potential concurrent
+ * reader if the value in memory is up to date or not with regards to the
+ * needed increment.  And any race in updating the value in memory is harmless
+ * as the same value would simply be stored more than once.
+ *
+ * The only restriction for the algorithm to work properly is that this
+ * code must be executed at least once per each half period of the 32-bit
+ * counter to properly update the state bit in memory. This is usually not a
+ * problem in practice, but if it is then a kernel timer could be scheduled
+ * to manage for this code to be executed often enough.
+ *
+ * Note that the top bit (bit 63) in the returned value should be considered
+ * as garbage.  It is not cleared here because callers are likely to use a
+ * multiplier on the returned value which can get rid of the top bit
+ * implicitly by making the multiplier even, therefore saving on a runtime
+ * clear-bit instruction. Otherwise caller must remember to clear the top
+ * bit explicitly.
+ */
+/* this is used only to give gcc a clue about good code generation */
+typedef union {
+        struct {
+#if defined(__LITTLE_ENDIAN)
+                u32 lo, hi;
+#elif defined(__BIG_ENDIAN)
+                u32 hi, lo;
+#endif
+        };
+        u64 val;
+} cnt32_to_63_t;
+#define cnt32_to_63(cnt_lo) \
+({ \
+        static volatile u32 __m_cnt_hi = 0; \
+        cnt32_to_63_t __x; \
+        __x.hi = __m_cnt_hi; \
+        __x.lo = (cnt_lo); \
+        if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \
+                __m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \
+        __x.val; \
+})
+#endif
author	Nicolas Pitre <nico@cam.org>	2006-12-04 14:19:31 -0500
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2006-12-07 11:06:45 -0500
commit	838ccbc35eae5b44d47724e5f694dbec4a26d269 (patch)
tree	374cd5d33b42cd5464bd45d62193307bc5f994ce /include
parent	fa4adc614922c24601320e55bc5a1f837abad6e9 (diff)

diff --git a/include/asm-arm/cnt32_to_63.h b/include/asm-arm/cnt32_to_63.h new file mode 100644 index 000000000000..480c873fa746 --- /dev/null +++ b/include/asm-arm/cnt32_to_63.h
@@ -0,0 +1,78 @@
	1	/*
	2	* include/asm/cnt32_to_63.h -- extend a 32-bit counter to 63 bits
	3	*
	4	* Author: Nicolas Pitre
	5	* Created: December 3, 2006
	6	* Copyright: MontaVista Software, Inc.
	7	*
	8	* This program is free software; you can redistribute it and/or modify
	9	* it under the terms of the GNU General Public License version 2
	10	* as published by the Free Software Foundation.
	11	*/
	12
	13	#ifndef __INCLUDE_CNT32_TO_63_H__
	14	#define __INCLUDE_CNT32_TO_63_H__
	15
	16	#include <linux/compiler.h>
	17	#include <asm/types.h>
	18	#include <asm/byteorder.h>
	19
	20	/*
	21	* Prototype: u64 cnt32_to_63(u32 cnt)
	22	* Many hardware clock counters are only 32 bits wide and therefore have
	23	* a relatively short period making wrap-arounds rather frequent. This
	24	* is a problem when implementing sched_clock() for example, where a 64-bit
	25	* non-wrapping monotonic value is expected to be returned.
	26	*
	27	* To overcome that limitation, let's extend a 32-bit counter to 63 bits
	28	* in a completely lock free fashion. Bits 0 to 31 of the clock are provided
	29	* by the hardware while bits 32 to 62 are stored in memory. The top bit in
	30	* memory is used to synchronize with the hardware clock half-period. When
	31	* the top bit of both counters (hardware and in memory) differ then the
	32	* memory is updated with a new value, incrementing it when the hardware
	33	* counter wraps around.
	34	*
	35	* Because a word store in memory is atomic then the incremented value will
	36	* always be in synch with the top bit indicating to any potential concurrent
	37	* reader if the value in memory is up to date or not with regards to the
	38	* needed increment. And any race in updating the value in memory is harmless
	39	* as the same value would simply be stored more than once.
	40	*
	41	* The only restriction for the algorithm to work properly is that this
	42	* code must be executed at least once per each half period of the 32-bit
	43	* counter to properly update the state bit in memory. This is usually not a
	44	* problem in practice, but if it is then a kernel timer could be scheduled
	45	* to manage for this code to be executed often enough.
	46	*
	47	* Note that the top bit (bit 63) in the returned value should be considered
	48	* as garbage. It is not cleared here because callers are likely to use a
	49	* multiplier on the returned value which can get rid of the top bit
	50	* implicitly by making the multiplier even, therefore saving on a runtime
	51	* clear-bit instruction. Otherwise caller must remember to clear the top
	52	* bit explicitly.
	53	*/
	54
	55	/* this is used only to give gcc a clue about good code generation */
	56	typedef union {
	57	struct {
	58	#if defined(__LITTLE_ENDIAN)
	59	u32 lo, hi;
	60	#elif defined(__BIG_ENDIAN)
	61	u32 hi, lo;
	62	#endif
	63	};
	64	u64 val;
	65	} cnt32_to_63_t;
	66
	67	#define cnt32_to_63(cnt_lo) \
	68	({ \
	69	static volatile u32 __m_cnt_hi = 0; \
	70	cnt32_to_63_t __x; \
	71	__x.hi = __m_cnt_hi; \
	72	__x.lo = (cnt_lo); \
	73	if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \
	74	__m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \
	75	__x.val; \
	76	})
	77
	78	#endif