Further restructuring to create 'unit_trace' pkg

The unit_trace folder should be placed in /usr/local/lib/pythonX.Y/site-packages. This makes unit-trace submodules available from anywhere on the system.
author: Mac Mollison <mollison@cs.unc.edu> 2010-03-13 12:12:37 -0500
committer: Mac Mollison <mollison@cs.unc.edu> 2010-03-13 12:12:37 -0500
commit: 122f457226f54ad23b7cd138512502e430e704dc (patch)
tree: fee0690936c3ae95255e559cd0fd09f0fa8c2ad4 /unit_trace/trace_reader.py
parent: 14a40b99735f09f6e70b8e897acbb622f9115ca3 (diff)
1 files changed, 245 insertions, 0 deletions
diff --git a/unit_trace/trace_reader.py b/unit_trace/trace_reader.py
new file mode 100644
index 0000000..a4ff964
--- /dev/null
+++ b/unit_trace/trace_reader.py
@@ -0,0 +1,245 @@
+###############################################################################
+# Description
+###############################################################################
+# trace_reader(files) returns an iterator which produces records
+# in order from the files given. (the param is a list of files.)
+#
+# Each record is just a Python object. It is guaranteed to have the following 
+# attributes:
+#   - 'pid': pid of the task
+#   - 'job': job number for that task
+#   - 'cpu', given by LITMUS
+#   - 'when', given by LITMUS as a timestamp. LITMUS does not provide a
+#       timestamp for all records. In this case, when is set to 0.
+#   - 'type', a numerical value given by LITMUS
+#   - 'type_name', a human-readable name defined in this module
+#   - 'record_type', set to 'event' by this module (to distinguish from, e.g.,
+#       error records produced elsewhere).
+#   - Possible additional attributes, depending on the type of record.
+#
+# To find out exactly what attributes are set for each record type, look at
+#     the trace-parsing information at the bottom of this file.
+###############################################################################
+# Imports
+###############################################################################
+import struct
+###############################################################################
+# Public functions
+###############################################################################
+# Generator function returning an iterable over records in a trace file.
+def trace_reader(files):
+    # Yield a record indicating the number of CPUs, used by the G-EDF test
+    class Obj: pass
+    record = Obj()
+    record.record_type = "meta"
+    record.type_name = "num_cpus"
+    record.num_cpus = len(files)
+    yield record
+    # Create iterators for each file and a buffer to store records in
+    file_iters = [] # file iterators
+    file_iter_buff = [] # file iterator buffers
+    for file in files:
+        file_iter = _get_file_iter(file)
+        file_iters.append(file_iter)
+        file_iter_buff.append([file_iter.next()])
+    # We keep 100 records in each buffer and then keep the buffer sorted
+    # This is because records may have been recorded slightly out of order
+    # This cannot guarantee records are produced in order, but it makes it
+    # overwhelmingly probably.
+    for x in range(0,len(file_iter_buff)):
+        for y in range(0,100):
+            file_iter_buff[x].append(file_iters[x].next()) 
+    for x in range(0,len(file_iter_buff)):
+        file_iter_buff[x] = sorted(file_iter_buff[x],key=lambda rec: rec.when)
+    # Remember the time of the last record. This way, we can make sure records
+    # truly are produced in monotonically increasing order by time and terminate
+    # fatally if they are not.
+    last_time = None
+    # Keep pulling records as long as we have a buffer
+    while len(file_iter_buff) > 0:
+        # Select the earliest record from those at the heads of the buffers
+        earliest = -1
+        buff_to_refill = -1
+        for x in range(0,len(file_iter_buff)):
+            if earliest==-1 or file_iter_buff[x][0].when < earliest.when:
+                earliest = file_iter_buff[x][0]
+                buff_to_refill = x
+        # Take it out of the buffer
+        del file_iter_buff[buff_to_refill][0]
+        # Try to append a new record to the buffer (if there is another) and
+        #     then keep the buffer sorted
+        try:
+            file_iter_buff[buff_to_refill].append(file_iters[buff_to_refill].next())
+            file_iter_buff[buff_to_refill] = sorted(file_iter_buff[buff_to_refill],
+                key=lambda rec: rec.when)
+        # If there aren't any more records, fine. Unless the buffer is also empty.
+        # If that is the case, delete the buffer.
+        except StopIteration:
+            if len(file_iter_buff[buff_to_refill]) < 1:
+                del file_iter_buff[buff_to_refill]
+                del file_iters[buff_to_refill]
+        # Check for monotonically increasing time
+        if last_time is not None and earliest.when < last_time:
+            exit("FATAL ERROR: trace_reader.py: out-of-order record produced")
+        else:
+            last_time = earliest.when
+        # Yield the record
+        yield earliest 
+###############################################################################
+# Private functions
+###############################################################################
+# Returns an iterator to pull records from a file
+def _get_file_iter(file):
+    f = open(file,'rb')
+    while True:
+        data = f.read(RECORD_HEAD_SIZE)
+        try:
+            type_num = struct.unpack_from('b',data)[0]
+        except struct.error: 
+            break #We read to the end of the file
+        type = _get_type(type_num)
+        try:
+            values = struct.unpack_from(StHeader.format + 
+                type.format,data)
+            record_dict = dict(zip(type.keys,values))
+        except struct.error:
+            f.close()
+            print "Invalid record detected, stopping."
+            exit()
+        # Convert the record_dict into an object
+        record = _dict2obj(record_dict)
+        # Give it a type name (easier to work with than type number)
+        record.type_name = _get_type_name(type_num)
+        # All records should have a 'record type' field.
+        # e.g. these are 'event's as opposed to 'error's
+        record.record_type = "event"
+        # If there is no timestamp, set the time to 0
+        if 'when' not in record.__dict__.keys():
+            record.when = 0
+        yield record
+# Convert a dict into an object
+def _dict2obj(d):
+    class Obj(object): pass
+    o = Obj()
+    for key in d.keys():
+        o.__dict__[key] = d[key]
+    return o 
+###############################################################################
+# Trace record data types and accessor functions
+###############################################################################
+# Each class below represents a type of event record. The format attribute
+# specifies how to decode the binary record and the keys attribute
+# specifies how to name the pieces of information decoded. Note that all
+# event records have a common initial 24 bytes, represented by the StHeader
+# class.
+RECORD_HEAD_SIZE = 24
+class StHeader:
+    format =  '<bbhi'
+    formatStr = struct.Struct(format)
+    keys = ['type','cpu','pid','job']
+    message = 'The header.'
+class StNameData:
+    format =  '16s'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['name']
+    message = 'The name of the executable of this process.'
+class StParamData:  
+    format =  'IIIc'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['wcet','period','phase','partition']
+    message = 'Regular parameters.'
+class StReleaseData:
+    format =  'QQ'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when','deadline']
+    message = 'A job was/is going to be released.'
+#Not yet used by Sched Trace 
+class StAssignedData:
+    format =  'Qc'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when','target']
+    message = 'A job was assigned to a CPU.'
+class StSwitchToData:
+    format =  'QI'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when','exec_time']
+    message = 'A process was switched to on a given CPU.'
+class StSwitchAwayData:
+    format =  'QI'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when','exec_time']
+    message = 'A process was switched away on a given CPU.'
+class StCompletionData:
+    #format =  'Q3x?c'
+    format = 'Q3xcc'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when','forced?','flags']
+    message = 'A job completed.'
+class StBlockData:
+    format =  'Q'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when']
+    message = 'A task blocks.'
+class StResumeData:
+    format =  'Q'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when']
+    message = 'A task resumes.'
+class StSysReleaseData:
+    format =  'QQ'
+    formatStr = struct.Struct(StHeader.format + format)
+    keys = StHeader.keys + ['when','release']
+    message = 'All tasks have checked in, task system released by user'
+# Return the binary data type, given the type_num
+def _get_type(type_num):
+    types = [None,StNameData,StParamData,StReleaseData,StAssignedData,
+             StSwitchToData,StSwitchAwayData,StCompletionData,StBlockData,
+             StResumeData,StSysReleaseData]
+    return types[type_num]
+# Return the type name, given the type_num (this is simply a convenience to
+#     programmers of other modules)
+def _get_type_name(type_num):
+    type_names = [None,"name","params","release","assign","switch_to",
+        "switch_away","completion","block","resume","sys_release"]
+    return type_names[type_num]
author	Mac Mollison <mollison@cs.unc.edu>	2010-03-13 12:12:37 -0500
committer	Mac Mollison <mollison@cs.unc.edu>	2010-03-13 12:12:37 -0500
commit	122f457226f54ad23b7cd138512502e430e704dc (patch)
tree	fee0690936c3ae95255e559cd0fd09f0fa8c2ad4 /unit_trace/trace_reader.py
parent	14a40b99735f09f6e70b8e897acbb622f9115ca3 (diff)

diff --git a/unit_trace/trace_reader.py b/unit_trace/trace_reader.py new file mode 100644 index 0000000..a4ff964 --- /dev/null +++ b/unit_trace/trace_reader.py
@@ -0,0 +1,245 @@
	1	###############################################################################
	2	# Description
	3	###############################################################################
	4
	5	# trace_reader(files) returns an iterator which produces records
	6	# in order from the files given. (the param is a list of files.)
	7	#
	8	# Each record is just a Python object. It is guaranteed to have the following
	9	# attributes:
	10	# - 'pid': pid of the task
	11	# - 'job': job number for that task
	12	# - 'cpu', given by LITMUS
	13	# - 'when', given by LITMUS as a timestamp. LITMUS does not provide a
	14	# timestamp for all records. In this case, when is set to 0.
	15	# - 'type', a numerical value given by LITMUS
	16	# - 'type_name', a human-readable name defined in this module
	17	# - 'record_type', set to 'event' by this module (to distinguish from, e.g.,
	18	# error records produced elsewhere).
	19	# - Possible additional attributes, depending on the type of record.
	20	#
	21	# To find out exactly what attributes are set for each record type, look at
	22	# the trace-parsing information at the bottom of this file.
	23
	24	###############################################################################
	25	# Imports
	26	###############################################################################
	27
	28	import struct
	29
	30
	31	###############################################################################
	32	# Public functions
	33	###############################################################################
	34
	35	# Generator function returning an iterable over records in a trace file.
	36	def trace_reader(files):
	37
	38	# Yield a record indicating the number of CPUs, used by the G-EDF test
	39	class Obj: pass
	40	record = Obj()
	41	record.record_type = "meta"
	42	record.type_name = "num_cpus"
	43	record.num_cpus = len(files)
	44	yield record
	45
	46	# Create iterators for each file and a buffer to store records in
	47	file_iters = [] # file iterators
	48	file_iter_buff = [] # file iterator buffers
	49	for file in files:
	50	file_iter = _get_file_iter(file)
	51	file_iters.append(file_iter)
	52	file_iter_buff.append([file_iter.next()])
	53
	54	# We keep 100 records in each buffer and then keep the buffer sorted
	55	# This is because records may have been recorded slightly out of order
	56	# This cannot guarantee records are produced in order, but it makes it
	57	# overwhelmingly probably.
	58	for x in range(0,len(file_iter_buff)):
	59	for y in range(0,100):
	60	file_iter_buff[x].append(file_iters[x].next())
	61	for x in range(0,len(file_iter_buff)):
	62	file_iter_buff[x] = sorted(file_iter_buff[x],key=lambda rec: rec.when)
	63
	64	# Remember the time of the last record. This way, we can make sure records
	65	# truly are produced in monotonically increasing order by time and terminate
	66	# fatally if they are not.
	67	last_time = None
	68
	69	# Keep pulling records as long as we have a buffer
	70	while len(file_iter_buff) > 0:
	71
	72	# Select the earliest record from those at the heads of the buffers
	73	earliest = -1
	74	buff_to_refill = -1
	75	for x in range(0,len(file_iter_buff)):
	76	if earliest==-1 or file_iter_buff[x][0].when < earliest.when:
	77	earliest = file_iter_buff[x][0]
	78	buff_to_refill = x
	79
	80	# Take it out of the buffer
	81	del file_iter_buff[buff_to_refill][0]
	82
	83	# Try to append a new record to the buffer (if there is another) and
	84	# then keep the buffer sorted
	85	try:
	86	file_iter_buff[buff_to_refill].append(file_iters[buff_to_refill].next())
	87	file_iter_buff[buff_to_refill] = sorted(file_iter_buff[buff_to_refill],
	88	key=lambda rec: rec.when)
	89
	90	# If there aren't any more records, fine. Unless the buffer is also empty.
	91	# If that is the case, delete the buffer.
	92	except StopIteration:
	93	if len(file_iter_buff[buff_to_refill]) < 1:
	94	del file_iter_buff[buff_to_refill]
	95	del file_iters[buff_to_refill]
	96
	97	# Check for monotonically increasing time
	98	if last_time is not None and earliest.when < last_time:
	99	exit("FATAL ERROR: trace_reader.py: out-of-order record produced")
	100	else:
	101	last_time = earliest.when
	102
	103	# Yield the record
	104	yield earliest
	105
	106	###############################################################################
	107	# Private functions
	108	###############################################################################
	109
	110	# Returns an iterator to pull records from a file
	111	def _get_file_iter(file):
	112	f = open(file,'rb')
	113	while True:
	114	data = f.read(RECORD_HEAD_SIZE)
	115	try:
	116	type_num = struct.unpack_from('b',data)[0]
	117	except struct.error:
	118	break #We read to the end of the file
	119	type = _get_type(type_num)
	120	try:
	121	values = struct.unpack_from(StHeader.format +
	122	type.format,data)
	123	record_dict = dict(zip(type.keys,values))
	124	except struct.error:
	125	f.close()
	126	print "Invalid record detected, stopping."
	127	exit()
	128
	129	# Convert the record_dict into an object
	130	record = _dict2obj(record_dict)
	131
	132	# Give it a type name (easier to work with than type number)
	133	record.type_name = _get_type_name(type_num)
	134
	135	# All records should have a 'record type' field.
	136	# e.g. these are 'event's as opposed to 'error's
	137	record.record_type = "event"
	138
	139	# If there is no timestamp, set the time to 0
	140	if 'when' not in record.__dict__.keys():
	141	record.when = 0
	142
	143	yield record
	144
	145	# Convert a dict into an object
	146	def _dict2obj(d):
	147	class Obj(object): pass
	148	o = Obj()
	149	for key in d.keys():
	150	o.__dict__[key] = d[key]
	151	return o
	152
	153	###############################################################################
	154	# Trace record data types and accessor functions
	155	###############################################################################
	156
	157	# Each class below represents a type of event record. The format attribute
	158	# specifies how to decode the binary record and the keys attribute
	159	# specifies how to name the pieces of information decoded. Note that all
	160	# event records have a common initial 24 bytes, represented by the StHeader
	161	# class.
	162
	163	RECORD_HEAD_SIZE = 24
	164
	165	class StHeader:
	166	format = '<bbhi'
	167	formatStr = struct.Struct(format)
	168	keys = ['type','cpu','pid','job']
	169	message = 'The header.'
	170
	171	class StNameData:
	172	format = '16s'
	173	formatStr = struct.Struct(StHeader.format + format)
	174	keys = StHeader.keys + ['name']
	175	message = 'The name of the executable of this process.'
	176
	177	class StParamData:
	178	format = 'IIIc'
	179	formatStr = struct.Struct(StHeader.format + format)
	180	keys = StHeader.keys + ['wcet','period','phase','partition']
	181	message = 'Regular parameters.'
	182
	183	class StReleaseData:
	184	format = 'QQ'
	185	formatStr = struct.Struct(StHeader.format + format)
	186	keys = StHeader.keys + ['when','deadline']
	187	message = 'A job was/is going to be released.'
	188
	189	#Not yet used by Sched Trace
	190	class StAssignedData:
	191	format = 'Qc'
	192	formatStr = struct.Struct(StHeader.format + format)
	193	keys = StHeader.keys + ['when','target']
	194	message = 'A job was assigned to a CPU.'
	195
	196	class StSwitchToData:
	197	format = 'QI'
	198	formatStr = struct.Struct(StHeader.format + format)
	199	keys = StHeader.keys + ['when','exec_time']
	200	message = 'A process was switched to on a given CPU.'
	201
	202	class StSwitchAwayData:
	203	format = 'QI'
	204	formatStr = struct.Struct(StHeader.format + format)
	205	keys = StHeader.keys + ['when','exec_time']
	206	message = 'A process was switched away on a given CPU.'
	207
	208	class StCompletionData:
	209	#format = 'Q3x?c'
	210	format = 'Q3xcc'
	211	formatStr = struct.Struct(StHeader.format + format)
	212	keys = StHeader.keys + ['when','forced?','flags']
	213	message = 'A job completed.'
	214
	215	class StBlockData:
	216	format = 'Q'
	217	formatStr = struct.Struct(StHeader.format + format)
	218	keys = StHeader.keys + ['when']
	219	message = 'A task blocks.'
	220
	221	class StResumeData:
	222	format = 'Q'
	223	formatStr = struct.Struct(StHeader.format + format)
	224	keys = StHeader.keys + ['when']
	225	message = 'A task resumes.'
	226
	227	class StSysReleaseData:
	228	format = 'QQ'
	229	formatStr = struct.Struct(StHeader.format + format)
	230	keys = StHeader.keys + ['when','release']
	231	message = 'All tasks have checked in, task system released by user'
	232
	233	# Return the binary data type, given the type_num
	234	def _get_type(type_num):
	235	types = [None,StNameData,StParamData,StReleaseData,StAssignedData,
	236	StSwitchToData,StSwitchAwayData,StCompletionData,StBlockData,
	237	StResumeData,StSysReleaseData]
	238	return types[type_num]
	239
	240	# Return the type name, given the type_num (this is simply a convenience to
	241	# programmers of other modules)
	242	def _get_type_name(type_num):
	243	type_names = [None,"name","params","release","assign","switch_to",
	244	"switch_away","completion","block","resume","sys_release"]
	245	return type_names[type_num]