aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/tsc_sync.c
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2012-02-06 21:32:20 -0500
committerIngo Molnar <mingo@elte.hu>2012-02-22 05:49:40 -0500
commitb0e5c77903fd717cc5eb02b7b8f5de3c869efc49 (patch)
tree1c76da4dbb0ff5c2171dfa3c2e6834070014eb3c /arch/x86/kernel/tsc_sync.c
parent97ac984d2feec885d10f900591431088eab42021 (diff)
x86/tsc: Reduce the TSC sync check time for core-siblings
For each logical CPU that is coming online, we spend 20msec for checking the TSC synchronization. And as this is done sequentially for each logical CPU boot, this time gets added up depending on the number of logical CPU's supported by the platform. Minimize this by using the socket topology information. If the target CPU coming online doesn't have any of its core-siblings online, a timeout of 20msec will be used for the TSC-warp measurement loop. Otherwise a smaller timeout of 2msec will be used, as we have some information about this socket already (and this information grows as we have more and more logical-siblings in that socket). Ideally we should be able to skip the TSC sync check on the other core-siblings, if the first logical CPU in a socket passed the sync test. But as the TSC is per-logical CPU and can potentially be modified wrongly by the bios before the OS boot, TSC sync test for smaller duration should be able to catch such errors. Also this will catch the condition where all the cores in the socket doesn't get reset at the same time. For example, with this modification, time spent in TSC sync checks on a 4 socket 10-core with HT system gets reduced from 1580msec to 212msec. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Acked-by: Arjan van de Ven <arjan@linux.intel.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Jack Steiner <steiner@sgi.com> Cc: venki@google.com Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/1328581940.29790.20.camel@sbsiddha-desk.sc.intel.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/tsc_sync.c')
-rw-r--r--arch/x86/kernel/tsc_sync.c29
1 files changed, 24 insertions, 5 deletions
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9eba29b46cb..fc25e60a588 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps;
42/* 42/*
43 * TSC-warp measurement loop running on both CPUs: 43 * TSC-warp measurement loop running on both CPUs:
44 */ 44 */
45static __cpuinit void check_tsc_warp(void) 45static __cpuinit void check_tsc_warp(unsigned int timeout)
46{ 46{
47 cycles_t start, now, prev, end; 47 cycles_t start, now, prev, end;
48 int i; 48 int i;
@@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void)
51 start = get_cycles(); 51 start = get_cycles();
52 rdtsc_barrier(); 52 rdtsc_barrier();
53 /* 53 /*
54 * The measurement runs for 20 msecs: 54 * The measurement runs for 'timeout' msecs:
55 */ 55 */
56 end = start + tsc_khz * 20ULL; 56 end = start + (cycles_t) tsc_khz * timeout;
57 now = start; 57 now = start;
58 58
59 for (i = 0; ; i++) { 59 for (i = 0; ; i++) {
@@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void)
99} 99}
100 100
101/* 101/*
102 * If the target CPU coming online doesn't have any of its core-siblings
103 * online, a timeout of 20msec will be used for the TSC-warp measurement
104 * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
105 * information about this socket already (and this information grows as we
106 * have more and more logical-siblings in that socket).
107 *
108 * Ideally we should be able to skip the TSC sync check on the other
109 * core-siblings, if the first logical CPU in a socket passed the sync test.
110 * But as the TSC is per-logical CPU and can potentially be modified wrongly
111 * by the bios, TSC sync test for smaller duration should be able
112 * to catch such errors. Also this will catch the condition where all the
113 * cores in the socket doesn't get reset at the same time.
114 */
115static inline unsigned int loop_timeout(int cpu)
116{
117 return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
118}
119
120/*
102 * Source CPU calls into this - it waits for the freshly booted 121 * Source CPU calls into this - it waits for the freshly booted
103 * target CPU to arrive and then starts the measurement: 122 * target CPU to arrive and then starts the measurement:
104 */ 123 */
@@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
135 */ 154 */
136 atomic_inc(&start_count); 155 atomic_inc(&start_count);
137 156
138 check_tsc_warp(); 157 check_tsc_warp(loop_timeout(cpu));
139 158
140 while (atomic_read(&stop_count) != cpus-1) 159 while (atomic_read(&stop_count) != cpus-1)
141 cpu_relax(); 160 cpu_relax();
@@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void)
183 while (atomic_read(&start_count) != cpus) 202 while (atomic_read(&start_count) != cpus)
184 cpu_relax(); 203 cpu_relax();
185 204
186 check_tsc_warp(); 205 check_tsc_warp(loop_timeout(smp_processor_id()));
187 206
188 /* 207 /*
189 * Ok, we are done: 208 * Ok, we are done: