aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/scripts/python/sched-migration.py
blob: de66cb3b72c9e6be9dc5d884611e0522def92631 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
#!/usr/bin/python
#
# Cpu task migration overview toy
#
# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
#
# perf script event handlers have been generated by perf script -g python
#
# This software is distributed under the terms of the GNU General
# Public License ("GPL") version 2 as published by the Free Software
# Foundation.


import os
import sys

from collections import defaultdict
from UserList import UserList

sys.path.append(os.environ['PERF_EXEC_PATH'] + \
	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
sys.path.append('scripts/python/Perf-Trace-Util/lib/Perf/Trace')

from perf_trace_context import *
from Core import *
from SchedGui import *


threads = { 0 : "idle"}

def thread_name(pid):
	return "%s:%d" % (threads[pid], pid)

class RunqueueEventUnknown:
	@staticmethod
	def color():
		return None

	def __repr__(self):
		return "unknown"

class RunqueueEventSleep:
	@staticmethod
	def color():
		return (0, 0, 0xff)

	def __init__(self, sleeper):
		self.sleeper = sleeper

	def __repr__(self):
		return "%s gone to sleep" % thread_name(self.sleeper)

class RunqueueEventWakeup:
	@staticmethod
	def color():
		return (0xff, 0xff, 0)

	def __init__(self, wakee):
		self.wakee = wakee

	def __repr__(self):
		return "%s woke up" % thread_name(self.wakee)

class RunqueueEventFork:
	@staticmethod
	def color():
		return (0, 0xff, 0)

	def __init__(self, child):
		self.child = child

	def __repr__(self):
		return "new forked task %s" % thread_name(self.child)

class RunqueueMigrateIn:
	@staticmethod
	def color():
		return (0, 0xf0, 0xff)

	def __init__(self, new):
		self.new = new

	def __repr__(self):
		return "task migrated in %s" % thread_name(self.new)

class RunqueueMigrateOut:
	@staticmethod
	def color():
		return (0xff, 0, 0xff)

	def __init__(self, old):
		self.old = old

	def __repr__(self):
		return "task migrated out %s" % thread_name(self.old)

class RunqueueSnapshot:
	def __init__(self, tasks = [0], event = RunqueueEventUnknown()):
		self.tasks = tuple(tasks)
		self.event = event

	def sched_switch(self, prev, prev_state, next):
		event = RunqueueEventUnknown()

		if taskState(prev_state) == "R" and next in self.tasks \
			and prev in self.tasks:
			return self

		if taskState(prev_state) != "R":
			event = RunqueueEventSleep(prev)

		next_tasks = list(self.tasks[:])
		if prev in self.tasks:
			if taskState(prev_state) != "R":
				next_tasks.remove(prev)
		elif taskState(prev_state) == "R":
			next_tasks.append(prev)

		if next not in next_tasks:
			next_tasks.append(next)

		return RunqueueSnapshot(next_tasks, event)

	def migrate_out(self, old):
		if old not in self.tasks:
			return self
		next_tasks = [task for task in self.tasks if task != old]

		return RunqueueSnapshot(next_tasks, RunqueueMigrateOut(old))

	def __migrate_in(self, new, event):
		if new in self.tasks:
			self.event = event
			return self
		next_tasks = self.tasks[:] + tuple([new])

		return RunqueueSnapshot(next_tasks, event)

	def migrate_in(self, new):
		return self.__migrate_in(new, RunqueueMigrateIn(new))

	def wake_up(self, new):
		return self.__migrate_in(new, RunqueueEventWakeup(new))

	def wake_up_new(self, new):
		return self.__migrate_in(new, RunqueueEventFork(new))

	def load(self):
		""" Provide the number of tasks on the runqueue.
		    Don't count idle"""
		return len(self.tasks) - 1

	def __repr__(self):
		ret = self.tasks.__repr__()
		ret += self.origin_tostring()

		return ret

class TimeSlice:
	def __init__(self, start, prev):
		self.start = start
		self.prev = prev
		self.end = start
		# cpus that triggered the event
		self.event_cpus = []
		if prev is not None:
			self.total_load = prev.total_load
			self.rqs = prev.rqs.copy()
		else:
			self.rqs = defaultdict(RunqueueSnapshot)
			self.total_load = 0

	def __update_total_load(self, old_rq, new_rq):
		diff = new_rq.load() - old_rq.load()
		self.total_load += diff

	def sched_switch(self, ts_list, prev, prev_state, next, cpu):
		old_rq = self.prev.rqs[cpu]
		new_rq = old_rq.sched_switch(prev, prev_state, next)

		if old_rq is new_rq:
			return

		self.rqs[cpu] = new_rq
		self.__update_total_load(old_rq, new_rq)
		ts_list.append(self)
		self.event_cpus = [cpu]

	def migrate(self, ts_list, new, old_cpu, new_cpu):
		if old_cpu == new_cpu:
			return
		old_rq = self.prev.rqs[old_cpu]
		out_rq = old_rq.migrate_out(new)
		self.rqs[old_cpu] = out_rq
		self.__update_total_load(old_rq, out_rq)

		new_rq = self.prev.rqs[new_cpu]
		in_rq = new_rq.migrate_in(new)
		self.rqs[new_cpu] = in_rq
		self.__update_total_load(new_rq, in_rq)

		ts_list.append(self)

		if old_rq is not out_rq:
			self.event_cpus.append(old_cpu)
		self.event_cpus.append(new_cpu)

	def wake_up(self, ts_list, pid, cpu, fork):
		old_rq = self.prev.rqs[cpu]
		if fork:
			new_rq = old_rq.wake_up_new(pid)
		else:
			new_rq = old_rq.wake_up(pid)

		if new_rq is old_rq:
			return
		self.rqs[cpu] = new_rq
		self.__update_total_load(old_rq, new_rq)
		ts_list.append(self)
		self.event_cpus = [cpu]

	def next(self, t):
		self.end = t
		return TimeSlice(t, self)

class TimeSliceList(UserList):
	def __init__(self, arg = []):
		self.data = arg

	def get_time_slice(self, ts):
		if len(self.data) == 0:
			slice = TimeSlice(ts, TimeSlice(-1, None))
		else:
			slice = self.data[-1].next(ts)
		return slice

	def find_time_slice(self, ts):
		start = 0
		end = len(self.data)
		found = -1
		searching = True
		while searching:
			if start == end or start == end - 1:
				searching = False

			i = (end + start) / 2
			if self.data[i].start <= ts and self.data[i].end >= ts:
				found = i
				end = i
				continue

			if self.data[i].end < ts:
				start = i

			elif self.data[i].start > ts:
				end = i

		return found

	def set_root_win(self, win):
		self.root_win = win

	def mouse_down(self, cpu, t):
		idx = self.find_time_slice(t)
		if idx == -1:
			return

		ts = self[idx]
		rq = ts.rqs[cpu]
		raw = "CPU: %d\n" % cpu
		raw += "Last event : %s\n" % rq.event.__repr__()
		raw += "Timestamp : %d.%06d\n" % (ts.start / (10 ** 9), (ts.start % (10 ** 9)) / 1000)
		raw += "Duration : %6d us\n" % ((ts.end - ts.start) / (10 ** 6))
		raw += "Load = %d\n" % rq.load()
		for t in rq.tasks:
			raw += "%s \n" % thread_name(t)

		self.root_win.update_summary(raw)

	def update_rectangle_cpu(self, slice, cpu):
		rq = slice.rqs[cpu]

		if slice.total_load != 0:
			load_rate = rq.load() / float(slice.total_load)
		else:
			load_rate = 0

		red_power = int(0xff - (0xff * load_rate))
		color = (0xff, red_power, red_power)

		top_color = None

		if cpu in slice.event_cpus:
			top_color = rq.event.color()

		self.root_win.paint_rectangle_zone(cpu, color, top_color, slice.start, slice.end)

	def fill_zone(self, start, end):
		i = self.find_time_slice(start)
		if i == -1:
			return

		for i in xrange(i, len(self.data)):
			timeslice = self.data[i]
			if timeslice.start > end:
				return

			for cpu in timeslice.rqs:
				self.update_rectangle_cpu(timeslice, cpu)

	def interval(self):
		if len(self.data) == 0:
			return (0, 0)

		return (self.data[0].start, self.data[-1].end)

	def nr_rectangles(self):
		last_ts = self.data[-1]
		max_cpu = 0
		for cpu in last_ts.rqs:
			if cpu > max_cpu:
				max_cpu = cpu
		return max_cpu


class SchedEventProxy:
	def __init__(self):
		self.current_tsk = defaultdict(lambda : -1)
		self.timeslices = TimeSliceList()

	def sched_switch(self, headers, prev_comm, prev_pid, prev_prio, prev_state,
			 next_comm, next_pid, next_prio):
		""" Ensure the task we sched out this cpu is really the one
		    we logged. Otherwise we may have missed traces """

		on_cpu_task = self.current_tsk[headers.cpu]

		if on_cpu_task != -1 and on_cpu_task != prev_pid:
			print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
				(headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)

		threads[prev_pid] = prev_comm
		threads[next_pid] = next_comm
		self.current_tsk[headers.cpu] = next_pid

		ts = self.timeslices.get_time_slice(headers.ts())
		ts.sched_switch(self.timeslices, prev_pid, prev_state, next_pid, headers.cpu)

	def migrate(self, headers, pid, prio, orig_cpu, dest_cpu):
		ts = self.timeslices.get_time_slice(headers.ts())
		ts.migrate(self.timeslices, pid, orig_cpu, dest_cpu)

	def wake_up(self, headers, comm, pid, success, target_cpu, fork):
		if success == 0:
			return
		ts = self.timeslices.get_time_slice(headers.ts())
		ts.wake_up(self.timeslices, pid, target_cpu, fork)


def trace_begin():
	global parser
	parser = SchedEventProxy()

def trace_end():
	app = wx.App(False)
	timeslices = parser.timeslices
	frame = RootFrame(timeslices, "Migration")
	app.MainLoop()

def sched__sched_stat_runtime(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, runtime, vruntime):
	pass

def sched__sched_stat_iowait(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, delay):
	pass

def sched__sched_stat_sleep(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, delay):
	pass

def sched__sched_stat_wait(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, delay):
	pass

def sched__sched_process_fork(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, parent_comm, parent_pid, child_comm, child_pid):
	pass

def sched__sched_process_wait(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, prio):
	pass

def sched__sched_process_exit(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, prio):
	pass

def sched__sched_process_free(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, prio):
	pass

def sched__sched_migrate_task(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, prio, orig_cpu,
	dest_cpu):
	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
				common_pid, common_comm, common_callchain)
	parser.migrate(headers, pid, prio, orig_cpu, dest_cpu)

def sched__sched_switch(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm, common_callchain,
	prev_comm, prev_pid, prev_prio, prev_state,
	next_comm, next_pid, next_prio):

	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
				common_pid, common_comm, common_callchain)
	parser.sched_switch(headers, prev_comm, prev_pid, prev_prio, prev_state,
			 next_comm, next_pid, next_prio)

def sched__sched_wakeup_new(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, prio, success,
	target_cpu):
	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
				common_pid, common_comm, common_callchain)
	parser.wake_up(headers, comm, pid, success, target_cpu, 1)

def sched__sched_wakeup(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, prio, success,
	target_cpu):
	headers = EventHeaders(common_cpu, common_secs, common_nsecs,
				common_pid, common_comm, common_callchain)
	parser.wake_up(headers, comm, pid, success, target_cpu, 0)

def sched__sched_wait_task(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid, prio):
	pass

def sched__sched_kthread_stop_ret(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, ret):
	pass

def sched__sched_kthread_stop(event_name, context, common_cpu,
	common_secs, common_nsecs, common_pid, common_comm,
	common_callchain, comm, pid):
	pass

def trace_unhandled(event_name, context, event_fields_dict):
	pass
s="hl com"> * Must be called with interrupts disabled. * Assumes that account_system_vtime() has been called recently * (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ void account_process_tick(struct task_struct *tsk, int user_tick) { cputime_t utime, utimescaled; utime = get_paca()->user_time; utimescaled = get_paca()->user_time_scaled; get_paca()->user_time = 0; get_paca()->user_time_scaled = 0; get_paca()->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() #endif void __delay(unsigned long loops) { unsigned long start; int diff; if (__USE_RTC()) { start = get_rtcl(); do { /* the RTCL register wraps at 1000000000 */ diff = get_rtcl() - start; if (diff < 0) diff += 1000000000; } while (diff < loops); } else { start = get_tbl(); while (get_tbl() - start < loops) HMT_low(); HMT_medium(); } } EXPORT_SYMBOL(__delay); void udelay(unsigned long usecs) { __delay(tb_ticks_per_usec * usecs); } EXPORT_SYMBOL(udelay); #ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); if (in_lock_functions(pc)) return regs->link; return pc; } EXPORT_SYMBOL(profile_pc); #endif #ifdef CONFIG_PPC_ISERIES /* * This function recalibrates the timebase based on the 49-bit time-of-day * value in the Titan chip. The Titan is much more accurate than the value * returned by the service processor for the timebase frequency. */ static int __init iSeries_tb_recal(void) { unsigned long titan, tb; /* Make sure we only run on iSeries */ if (!firmware_has_feature(FW_FEATURE_ISERIES)) return -ENODEV; tb = get_tb(); titan = HvCallXm_loadTod(); if ( iSeries_recal_titan ) { unsigned long tb_ticks = tb - iSeries_recal_tb; unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12; unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec; unsigned long new_tb_ticks_per_jiffy = DIV_ROUND_CLOSEST(new_tb_ticks_per_sec, HZ); long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy; char sign = '+'; /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */ new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ; if ( tick_diff < 0 ) { tick_diff = -tick_diff; sign = '-'; } if ( tick_diff ) { if ( tick_diff < tb_ticks_per_jiffy/25 ) { printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n", new_tb_ticks_per_jiffy, sign, tick_diff ); tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; tb_ticks_per_sec = new_tb_ticks_per_sec; calc_cputime_factors(); vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; setup_cputime_one_jiffy(); } else { printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" " new tb_ticks_per_jiffy = %lu\n" " old tb_ticks_per_jiffy = %lu\n", new_tb_ticks_per_jiffy, tb_ticks_per_jiffy ); } } } iSeries_recal_titan = titan; iSeries_recal_tb = tb; /* Called here as now we know accurate values for the timebase */ clocksource_init(); return 0; } late_initcall(iSeries_tb_recal); /* Called from platform early init */ void __init iSeries_time_init_early(void) { iSeries_recal_tb = get_tb(); iSeries_recal_titan = HvCallXm_loadTod(); } #endif /* CONFIG_PPC_ISERIES */ #ifdef CONFIG_IRQ_WORK /* * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 static inline unsigned long test_irq_work_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) : "i" (offsetof(struct paca_struct, irq_work_pending))); return x; } static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : "r" (1), "i" (offsetof(struct paca_struct, irq_work_pending))); } static inline void clear_irq_work_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), "i" (offsetof(struct paca_struct, irq_work_pending))); } #else /* 32-bit */ DEFINE_PER_CPU(u8, irq_work_pending); #define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 #define test_irq_work_pending() __get_cpu_var(irq_work_pending) #define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 #endif /* 32 vs 64 bit */ void set_irq_work_pending(void) { preempt_disable(); set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 #define clear_irq_work_pending() #endif /* CONFIG_IRQ_WORK */ /* * For iSeries shared processors, we have to let the hypervisor * set the hardware decrementer. We set a virtual decrementer * in the lppaca and call the hypervisor if the virtual * decrementer is less than the current value in the hardware * decrementer. (almost always the new decrementer value will * be greater than the current hardware decementer so the hypervisor * call will not be needed) */ /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. */ void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); struct clock_event_device *evt = &decrementer->event; u64 now; trace_timer_interrupt_entry(regs); __get_cpu_var(irq_stat).timer_irqs++; /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif old_regs = set_irq_regs(regs); irq_enter(); if (test_irq_work_pending()) { clear_irq_work_pending(); irq_work_run(); } #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES)) get_lppaca()->int_dword.fields.decr_int = 0; #endif now = get_tb_or_rtc(); if (now >= decrementer->next_tb) { decrementer->next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); } else { now = decrementer->next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); } #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) process_hvlpevents(); #endif #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif irq_exit(); set_irq_regs(old_regs); trace_timer_interrupt_exit(regs); } #ifdef CONFIG_SUSPEND static void generic_suspend_disable_irqs(void) { /* Disable the decrementer, so that it doesn't interfere * with suspending. */ set_dec(0x7fffffff); local_irq_disable(); set_dec(0x7fffffff); } static void generic_suspend_enable_irqs(void) { local_irq_enable(); } /* Overrides the weak version in kernel/power/main.c */ void arch_suspend_disable_irqs(void) { if (ppc_md.suspend_disable_irqs) ppc_md.suspend_disable_irqs(); generic_suspend_disable_irqs(); } /* Overrides the weak version in kernel/power/main.c */ void arch_suspend_enable_irqs(void) { generic_suspend_enable_irqs(); if (ppc_md.suspend_enable_irqs) ppc_md.suspend_enable_irqs(); } #endif /* * Scheduler clock - returns current time in nanosec units. * * Note: mulhdu(a, b) (multiply high double unsigned) returns * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b * are 64-bit unsigned numbers. */ unsigned long long sched_clock(void) { if (__USE_RTC()) return get_rtc(); return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; const unsigned int *fp; int found = 0; /* The cpu node should have timebase and clock frequency properties */ cpu = of_find_node_by_type(NULL, "cpu"); if (cpu) { fp = of_get_property(cpu, name, NULL); if (fp) { found = 1; *val = of_read_ulong(fp, cells); } of_node_put(cpu); } return found; } /* should become __cpuinit when secondary_cpu_time_init also is */ void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) /* Clear any pending timer interrupts */ mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); /* Enable decrementer interrupt */ mtspr(SPRN_TCR, TCR_DIE); #endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */ } void __init generic_calibrate_decr(void) { ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) && !get_freq("timebase-frequency", 1, &ppc_tb_freq)) { printk(KERN_ERR "WARNING: Estimating decrementer frequency " "(not found)\n"); } ppc_proc_freq = DEFAULT_PROC_FREQ; /* hardcoded default */ if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) && !get_freq("clock-frequency", 1, &ppc_proc_freq)) { printk(KERN_ERR "WARNING: Estimating processor frequency " "(not found)\n"); } } int update_persistent_clock(struct timespec now) { struct rtc_time tm; if (!ppc_md.set_rtc_time) return 0; to_tm(now.tv_sec + 1 + timezone_offset, &tm); tm.tm_year -= 1900; tm.tm_mon -= 1; return ppc_md.set_rtc_time(&tm); } static void __read_persistent_clock(struct timespec *ts) { struct rtc_time tm; static int first = 1; ts->tv_nsec = 0; /* XXX this is a litle fragile but will work okay in the short term */ if (first) { first = 0; if (ppc_md.time_init) timezone_offset = ppc_md.time_init(); /* get_boot_time() isn't guaranteed to be safe to call late */ if (ppc_md.get_boot_time) { ts->tv_sec = ppc_md.get_boot_time() - timezone_offset; return; } } if (!ppc_md.get_rtc_time) { ts->tv_sec = 0; return; } ppc_md.get_rtc_time(&tm); ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); } void read_persistent_clock(struct timespec *ts) { __read_persistent_clock(ts); /* Sanitize it in case real time clock is set below EPOCH */ if (ts->tv_sec < 0) { ts->tv_sec = 0; ts->tv_nsec = 0; } } /* clocksource code */ static cycle_t rtc_read(struct clocksource *cs) { return (cycle_t)get_rtc(); } static cycle_t timebase_read(struct clocksource *cs) { return (cycle_t)get_tb(); } void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, struct clocksource *clock, u32 mult) { u64 new_tb_to_xs, new_stamp_xsec; u32 frac_sec; if (clock != &clocksource_timebase) return; /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_mb(); /* XXX this assumes clock->shift == 22 */ /* 4611686018 ~= 2^(20+64-22) / 1e9 */ new_tb_to_xs = (u64) mult * 4611686018ULL; new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; do_div(new_stamp_xsec, 1000000000); new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); /* this is tv_nsec / 1e9 as a 0.32 fraction */ frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; /* * tb_update_count is used to allow the userspace gettimeofday code * to assure itself that it sees a consistent view of the tb_to_xs and * stamp_xsec variables. It reads the tb_update_count, then reads * tb_to_xs and stamp_xsec and then reads tb_update_count again. If * the two values of tb_update_count match and are even then the * tb_to_xs and stamp_xsec values are consistent. If not, then it * loops back and reads them again until this criteria is met. * We expect the caller to have done the first increment of * vdso_data->tb_update_count already. */ vdso_data->tb_orig_stamp = clock->cycle_last; vdso_data->stamp_xsec = new_stamp_xsec; vdso_data->tb_to_xs = new_tb_to_xs; vdso_data->wtom_clock_sec = wtm->tv_sec; vdso_data->wtom_clock_nsec = wtm->tv_nsec; vdso_data->stamp_xtime = *wall_time; vdso_data->stamp_sec_fraction = frac_sec; smp_wmb(); ++(vdso_data->tb_update_count); } void update_vsyscall_tz(void) { /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_mb(); vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; vdso_data->tz_dsttime = sys_tz.tz_dsttime; smp_mb(); ++vdso_data->tb_update_count; } static void __init clocksource_init(void) { struct clocksource *clock; if (__USE_RTC()) clock = &clocksource_rtc; else clock = &clocksource_timebase; clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift); if (clocksource_register(clock)) { printk(KERN_ERR "clocksource: %s is already registered\n", clock->name); return; } printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n", clock->name, clock->mult, clock->shift); } static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { __get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt; set_dec(evt); return 0; } static void decrementer_set_mode(enum clock_event_mode mode, struct clock_event_device *dev) { if (mode != CLOCK_EVT_MODE_ONESHOT) decrementer_set_next_event(DECREMENTER_MAX, dev); } static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec, int shift) { uint64_t tmp = ((uint64_t)ticks) << shift; do_div(tmp, nsec); return tmp; } static void __init setup_clockevent_multiplier(unsigned long hz) { u64 mult, shift = 32; while (1) { mult = div_sc64(hz, NSEC_PER_SEC, shift); if (mult && (mult >> 32UL) == 0UL) break; shift--; } decrementer_clockevent.shift = shift; decrementer_clockevent.mult = mult; } static void register_decrementer_clockevent(int cpu) { struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); clockevents_register_device(dec); } static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); setup_clockevent_multiplier(ppc_tb_freq); decrementer_clockevent.max_delta_ns = clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = clockevent_delta2ns(2, &decrementer_clockevent); register_decrementer_clockevent(cpu); } void secondary_cpu_time_init(void) { /* Start the decrementer on CPUs that have manual control * such as BookE */ start_cpu_decrementer(); /* FIME: Should make unrelatred change to move snapshot_timebase * call here ! */ register_decrementer_clockevent(smp_processor_id()); } /* This function is only called on the boot processor */ void __init time_init(void) { struct div_result res; u64 scale; unsigned shift; if (__USE_RTC()) { /* 601 processor: dec counts down by 128 every 128ns */ ppc_tb_freq = 1000000000; } else { /* Normal PowerPC with timebase register */ ppc_md.calibrate_decr(); printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); } tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; calc_cputime_factors(); setup_cputime_one_jiffy(); /* * Compute scale factor for sched_clock. * The calibrate_decr() function has set tb_ticks_per_sec, * which is the timebase frequency. * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret * the 128-bit result as a 64.64 fixed-point number. * We then shift that number right until it is less than 1.0, * giving us the scale factor and shift count to use in * sched_clock(). */ div128_by_32(1000000000, 0, tb_ticks_per_sec, &res); scale = res.result_low; for (shift = 0; res.result_high != 0; ++shift) { scale = (scale >> 1) | (res.result_high << 63); res.result_high >>= 1; } tb_to_ns_scale = scale; tb_to_ns_shift = shift; /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ boot_tb = get_tb_or_rtc(); /* If platform provided a timezone (pmac), we correct the time */ if (timezone_offset) { sys_tz.tz_minuteswest = -timezone_offset / 60; sys_tz.tz_dsttime = 0; } vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; /* Start the decrementer on CPUs that have manual control * such as BookE */ start_cpu_decrementer(); /* Register the clocksource, if we're not running on iSeries */ if (!firmware_has_feature(FW_FEATURE_ISERIES)) clocksource_init(); init_decrementer_clockevent(); } #define FEBRUARY 2 #define STARTOFTIME 1970 #define SECDAY 86400L #define SECYR (SECDAY * 365) #define leapyear(year) ((year) % 4 == 0 && \ ((year) % 100 != 0 || (year) % 400 == 0)) #define days_in_year(a) (leapyear(a) ? 366 : 365) #define days_in_month(a) (month_days[(a) - 1]) static int month_days[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; /* * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) */ void GregorianDay(struct rtc_time * tm) { int leapsToDate; int lastYear; int day; int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; lastYear = tm->tm_year - 1; /* * Number of leap corrections to apply up to end of last year */ leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400; /* * This year is a leap year if it is divisible by 4 except when it is * divisible by 100 unless it is divisible by 400 * * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was */ day = tm->tm_mon > 2 && leapyear(tm->tm_year); day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + tm->tm_mday; tm->tm_wday = day % 7; } void to_tm(int tim, struct rtc_time * tm) { register int i; register long hms, day; day = tim / SECDAY; hms = tim % SECDAY; /* Hours, minutes, seconds are easy */ tm->tm_hour = hms / 3600; tm->tm_min = (hms % 3600) / 60; tm->tm_sec = (hms % 3600) % 60; /* Number of years in days */ for (i = STARTOFTIME; day >= days_in_year(i); i++) day -= days_in_year(i); tm->tm_year = i; /* Number of months in days left */ if (leapyear(tm->tm_year)) days_in_month(FEBRUARY) = 29; for (i = 1; day >= days_in_month(i); i++) day -= days_in_month(i); days_in_month(FEBRUARY) = 28; tm->tm_mon = i; /* Days are what is left over (+1) from all that. */ tm->tm_mday = day + 1; /* * Determine the day of week */ GregorianDay(tm); } /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit * result. */ void div128_by_32(u64 dividend_high, u64 dividend_low, unsigned divisor, struct div_result *dr) { unsigned long a, b, c, d; unsigned long w, x, y, z; u64 ra, rb, rc; a = dividend_high >> 32; b = dividend_high & 0xffffffff; c = dividend_low >> 32; d = dividend_low & 0xffffffff; w = a / divisor; ra = ((u64)(a - (w * divisor)) << 32) + b; rb = ((u64) do_div(ra, divisor) << 32) + c; x = ra; rc = ((u64) do_div(rb, divisor) << 32) + d; y = rb; do_div(rc, divisor); z = rc; dr->result_high = ((u64)w << 32) + x; dr->result_low = ((u64)y << 32) + z; } /* We don't need to calibrate delay, we use the CPU timebase for that */ void calibrate_delay(void) { /* Some generic code (such as spinlock debug) use loops_per_jiffy * as the number of __delay(1) in a jiffy, so make it so */ loops_per_jiffy = tb_ticks_per_jiffy; } static int __init rtc_init(void) { struct platform_device *pdev; if (!ppc_md.get_rtc_time) return -ENODEV; pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); if (IS_ERR(pdev)) return PTR_ERR(pdev); return 0; } module_init(rtc_init);