Implemented correct ordering for trace reader and a test suite

- Trace reader reads in records from all files "in parallel", so they come in ordered correctly - runtests.py has unit tests in it for testing the program - Added sample_traces as part of test suite
author: Mac Mollison <mollison@cs.unc.edu> 2010-02-08 01:59:10 -0500
committer: Mac Mollison <mollison@cs.unc.edu> 2010-02-08 01:59:10 -0500
commit: 4f8a5a5727346b73a3ccd8e3120db8193de34960 (patch)
tree: 356122e5e6309733ccf3c285c319d5d94a495532
parent: 1016a3e271faebb899766f5d18468dd88b4d84b7 (diff)
10 files changed, 127 insertions, 28 deletions
diff --git a/TODO b/TODO
index 41dc878..e69de29 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +0,0 @@
- Currently, trace.py reads all records from the first trace file, then all
-    records from the second, etc. Instead it should progress through all
-    files simultaneously, producing records in order based on timestamp.
diff --git a/run.py b/run.py
index 42def18..ca13708 100755
--- a/run.py
+++ b/run.py
@@ -20,10 +20,10 @@ import text_print
 ###############################################################################
 g4 = [
-'/home/mollison/old/sta/traces/st-g4-0.bin',
+'./sample_traces/st-g4-0.bin',
-'/home/mollison/old/sta/traces/st-g4-1.bin',
+'./sample_traces/st-g4-1.bin',
-'/home/mollison/old/sta/traces/st-g4-2.bin',
+'./sample_traces/st-g4-2.bin',
-'/home/mollison/old/sta/traces/st-g4-3.bin'
+'./sample_traces/st-g4-3.bin',
 ]
 ###############################################################################
diff --git a/runtests.py b/runtests.py
new file mode 100755
index 0000000..dbbc9a9
--- /dev/null
+++ b/runtests.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+###############################################################################
+# Description
+###############################################################################
+# Unit Tests
+###############################################################################
+# Imports
+###############################################################################
+import trace
+import os
+###############################################################################
+# Trace files
+###############################################################################
+files = [
+'./sample_traces/st-g4-0.bin',
+'./sample_traces/st-g4-1.bin',
+'./sample_traces/st-g4-2.bin',
+'./sample_traces/st-g4-3.bin',
+]
+###############################################################################
+# Tests
+###############################################################################
+# Does the trace reader sort files by time correctly?
+def test1():
+    stream = trace.get_trace_record_stream(files)
+    last_time = 0
+    for item in stream:
+        if last_time > item.when:
+            return "[FAIL]"
+        last_time = item.when
+    return "[SUCCESS]"
+print("Test 1: {}".format(test1()))
diff --git a/sample_traces/.runtests.py.swp b/sample_traces/.runtests.py.swp
new file mode 100644
index 0000000..d9a9acd
--- /dev/null
+++ b/sample_traces/.runtests.py.swp
Binary files differ
diff --git a/sample_traces/st-g4-0.bin b/sample_traces/st-g4-0.bin
new file mode 100644
index 0000000..c52ed7b
--- /dev/null
+++ b/sample_traces/st-g4-0.bin
Binary files differ
diff --git a/sample_traces/st-g4-1.bin b/sample_traces/st-g4-1.bin
new file mode 100644
index 0000000..77fea28
--- /dev/null
+++ b/sample_traces/st-g4-1.bin
Binary files differ
diff --git a/sample_traces/st-g4-2.bin b/sample_traces/st-g4-2.bin
new file mode 100644
index 0000000..4d5d3cd
--- /dev/null
+++ b/sample_traces/st-g4-2.bin
Binary files differ
diff --git a/sample_traces/st-g4-3.bin b/sample_traces/st-g4-3.bin
new file mode 100644
index 0000000..1cd7ee4
--- /dev/null
+++ b/sample_traces/st-g4-3.bin
Binary files differ
diff --git a/text_print.py b/text_print.py
index 7254b51..98e91fa 100644
--- a/text_print.py
+++ b/text_print.py
@@ -21,3 +21,4 @@ def print_stream(stream):
 def _print_event(record):
    print("Job: {}.{}".format(record.pid,record.job))
    print("Type: {}".format(record.type_name))
+    print("Time: {}".format(record.when))
diff --git a/trace.py b/trace.py
index 1ceccae..a0a32ce 100644
--- a/trace.py
+++ b/trace.py
@@ -19,32 +19,89 @@ import struct
 # Generator function returning an iterable over records in a trace file.
 def get_trace_record_stream(files):
+    # Create iterators for each file and a buffer to store records in
+    file_iters = [] # file iterators
+    file_iter_buff = [] # file iterator buffers
    for file in files:
-        with open(file,'rb') as f:
+        file_iter = _get_file_iter(file)
-            while True:
+        file_iters.append(_get_file_iter(file))
-                data = f.read(24)
+        file_iter_buff.append([next(file_iter)])
-                try:
-                    type_num = struct.unpack_from('b',data)[0]
+    # We keep 100 records in each buffer and then keep the buffer sorted
-                except struct.error: 
+    # This is because records may be recorded slightly out of order
-                    break #We read to the end of the file
+    for x in range(0,len(file_iter_buff)):
-                type = _get_type(type_num)
+        for y in range(0,100):
-                try:
+            file_iter_buff[x].append(next(file_iters[x])) 
-                    values = struct.unpack_from(StHeader.format + 
+    for x in range(0,len(file_iter_buff)):
-                        type.format,data)
+        file_iter_buff[x] = sorted(file_iter_buff[x],key=lambda rec: rec.when)
-                    record_dict = dict(zip(type.keys,values))
-                except struct.error:
+    # Keep pulling records as long as we have a buffer
-                    f.close()
+    while len(file_iter_buff) > 0:
-                    print("Invalid record detected, stopping.")
-                    exit()
+        # Select the earliest record from those at the heads of the buffers
-                Record = _dict2obj(record_dict)
+        earliest = -1
-                Record.type_name = _get_type_name(type_num)
+        buff_to_refill = -1
-                Record.record_type = "event"
+        for x in range(0,len(file_iter_buff)):
-                yield Record
+            if earliest==-1 or file_iter_buff[x][0].when < earliest.when:
+                earliest = file_iter_buff[x][0]
+                buff_to_refill = x
+        # Take it out of the buffer
+        del file_iter_buff[buff_to_refill][0]
+        # Try to append a new record to the buffer (if there is another) and
+        #     then keep the buffer sorted
+        try:
+            file_iter_buff[buff_to_refill].append(next(file_iters[buff_to_refill]))
+            file_iter_buff[buff_to_refill] = sorted(file_iter_buff[buff_to_refill],
+                key=lambda rec: rec.when)
+        # If there aren't any more records, fine. Unless the buffer is also empty.
+        # If that is the case, delete the buffer.
+        except StopIteration:
+            if len(file_iter_buff[buff_to_refill]) < 1:
+                del file_iter_buff[buff_to_refill]
+                del file_iters[buff_to_refill]
+        # Yield the record
+        yield earliest 
 ###############################################################################
 # Private functions
 ###############################################################################
+# Returns an iterator to pull records from a file
+def _get_file_iter(file):
+    with open(file,'rb') as f:
+        while True:
+            data = f.read(RECORD_HEAD_SIZE)
+            try:
+                type_num = struct.unpack_from('b',data)[0]
+            except struct.error: 
+                break #We read to the end of the file
+            type = _get_type(type_num)
+            try:
+                values = struct.unpack_from(StHeader.format + 
+                    type.format,data)
+                record_dict = dict(zip(type.keys,values))
+            except struct.error:
+                f.close()
+                print("Invalid record detected, stopping.")
+                exit()
+            record = _dict2obj(record_dict)
+            record.type_name = _get_type_name(type_num)
+            # All records should have a 'record type' field.
+            # e.g. these are 'event's as opposed to 'error's
+            record.record_type = "event"
+            # If there is no timestamp, set the time to 0
+            if 'when' not in record.__dict__.keys():
+                record.when = 0
+            yield record
 # Convert a dict into an object
 def _dict2obj(d):
    class Obj: pass
@@ -57,6 +114,8 @@ def _dict2obj(d):
 # Trace record data types and accessor functions
 ###############################################################################
+RECORD_HEAD_SIZE = 24
 class StHeader:
    format =  '<bbhi'
    formatStr = struct.Struct(format)
@@ -78,7 +137,7 @@ class StParamData:
 class StReleaseData:
    format =  'QQ'
    formatStr = struct.Struct(StHeader.format + format)
-    keys = StHeader.keys + ['release_time','deadline']
+    keys = StHeader.keys + ['when','deadline']
    message = 'A job was/is going to be released.'
 #Not yet used by Sched Trace
author	Mac Mollison <mollison@cs.unc.edu>	2010-02-08 01:59:10 -0500
committer	Mac Mollison <mollison@cs.unc.edu>	2010-02-08 01:59:10 -0500
commit	4f8a5a5727346b73a3ccd8e3120db8193de34960 (patch)
tree	356122e5e6309733ccf3c285c319d5d94a495532
parent	1016a3e271faebb899766f5d18468dd88b4d84b7 (diff)

diff --git a/TODO b/TODO index 41dc878..e69de29 100644 --- a/TODO +++ b/TODO
@@ -1,3 +0,0 @@
1	- Currently, trace.py reads all records from the first trace file, then all
2	records from the second, etc. Instead it should progress through all
3	files simultaneously, producing records in order based on timestamp.


diff --git a/run.py b/run.py index 42def18..ca13708 100755 --- a/run.py +++ b/run.py
@@ -20,10 +20,10 @@ import text_print
20	###############################################################################	20	###############################################################################
21		21
22	g4 = [	22	g4 = [
23	'/home/mollison/old/sta/traces/st-g4-0.bin',	23	'./sample_traces/st-g4-0.bin',
24	'/home/mollison/old/sta/traces/st-g4-1.bin',	24	'./sample_traces/st-g4-1.bin',
25	'/home/mollison/old/sta/traces/st-g4-2.bin',	25	'./sample_traces/st-g4-2.bin',
26	'/home/mollison/old/sta/traces/st-g4-3.bin'	26	'./sample_traces/st-g4-3.bin',
27	]	27	]
28		28
29	###############################################################################	29	###############################################################################


diff --git a/runtests.py b/runtests.py new file mode 100755 index 0000000..dbbc9a9 --- /dev/null +++ b/runtests.py
@@ -0,0 +1,42 @@
		1	#!/usr/bin/env python3
		2
		3	###############################################################################
		4	# Description
		5	###############################################################################
		6
		7	# Unit Tests
		8
		9
		10	###############################################################################
		11	# Imports
		12	###############################################################################
		13
		14	import trace
		15	import os
		16
		17	###############################################################################
		18	# Trace files
		19	###############################################################################
		20
		21	files = [
		22	'./sample_traces/st-g4-0.bin',
		23	'./sample_traces/st-g4-1.bin',
		24	'./sample_traces/st-g4-2.bin',
		25	'./sample_traces/st-g4-3.bin',
		26	]
		27
		28	###############################################################################
		29	# Tests
		30	###############################################################################
		31
		32	# Does the trace reader sort files by time correctly?
		33	def test1():
		34	stream = trace.get_trace_record_stream(files)
		35	last_time = 0
		36	for item in stream:
		37	if last_time > item.when:
		38	return "[FAIL]"
		39	last_time = item.when
		40	return "[SUCCESS]"
		41
		42	print("Test 1: {}".format(test1()))


diff --git a/sample_traces/.runtests.py.swp b/sample_traces/.runtests.py.swp new file mode 100644 index 0000000..d9a9acd --- /dev/null +++ b/sample_traces/.runtests.py.swp
Binary files differ


diff --git a/sample_traces/st-g4-0.bin b/sample_traces/st-g4-0.bin new file mode 100644 index 0000000..c52ed7b --- /dev/null +++ b/sample_traces/st-g4-0.bin
Binary files differ


diff --git a/sample_traces/st-g4-1.bin b/sample_traces/st-g4-1.bin new file mode 100644 index 0000000..77fea28 --- /dev/null +++ b/sample_traces/st-g4-1.bin
Binary files differ


diff --git a/sample_traces/st-g4-2.bin b/sample_traces/st-g4-2.bin new file mode 100644 index 0000000..4d5d3cd --- /dev/null +++ b/sample_traces/st-g4-2.bin
Binary files differ


diff --git a/sample_traces/st-g4-3.bin b/sample_traces/st-g4-3.bin new file mode 100644 index 0000000..1cd7ee4 --- /dev/null +++ b/sample_traces/st-g4-3.bin
Binary files differ


diff --git a/text_print.py b/text_print.py index 7254b51..98e91fa 100644 --- a/text_print.py +++ b/text_print.py
@@ -21,3 +21,4 @@ def print_stream(stream):
21	def _print_event(record):	21	def _print_event(record):
22	print("Job: {}.{}".format(record.pid,record.job))	22	print("Job: {}.{}".format(record.pid,record.job))
23	print("Type: {}".format(record.type_name))	23	print("Type: {}".format(record.type_name))
		24	print("Time: {}".format(record.when))


diff --git a/trace.py b/trace.py index 1ceccae..a0a32ce 100644 --- a/trace.py +++ b/trace.py
@@ -19,32 +19,89 @@ import struct
19		19
20	# Generator function returning an iterable over records in a trace file.	20	# Generator function returning an iterable over records in a trace file.
21	def get_trace_record_stream(files):	21	def get_trace_record_stream(files):
		22
		23	# Create iterators for each file and a buffer to store records in
		24	file_iters = [] # file iterators
		25	file_iter_buff = [] # file iterator buffers
22	for file in files:	26	for file in files:
23	with open(file,'rb') as f:	27	file_iter = _get_file_iter(file)
24	while True:	28	file_iters.append(_get_file_iter(file))
25	data = f.read(24)	29	file_iter_buff.append([next(file_iter)])
26	try:	30
27	type_num = struct.unpack_from('b',data)[0]	31	# We keep 100 records in each buffer and then keep the buffer sorted
28	except struct.error:	32	# This is because records may be recorded slightly out of order
29	break #We read to the end of the file	33	for x in range(0,len(file_iter_buff)):
30	type = _get_type(type_num)	34	for y in range(0,100):
31	try:	35	file_iter_buff[x].append(next(file_iters[x]))
32	values = struct.unpack_from(StHeader.format +	36	for x in range(0,len(file_iter_buff)):
33	type.format,data)	37	file_iter_buff[x] = sorted(file_iter_buff[x],key=lambda rec: rec.when)
34	record_dict = dict(zip(type.keys,values))	38
35	except struct.error:	39	# Keep pulling records as long as we have a buffer
36	f.close()	40	while len(file_iter_buff) > 0:
37	print("Invalid record detected, stopping.")	41
38	exit()	42	# Select the earliest record from those at the heads of the buffers
39	Record = _dict2obj(record_dict)	43	earliest = -1
40	Record.type_name = _get_type_name(type_num)	44	buff_to_refill = -1
41	Record.record_type = "event"	45	for x in range(0,len(file_iter_buff)):
42	yield Record	46	if earliest==-1 or file_iter_buff[x][0].when < earliest.when:
		47	earliest = file_iter_buff[x][0]
		48	buff_to_refill = x
		49
		50	# Take it out of the buffer
		51	del file_iter_buff[buff_to_refill][0]
		52
		53	# Try to append a new record to the buffer (if there is another) and
		54	# then keep the buffer sorted
		55	try:
		56	file_iter_buff[buff_to_refill].append(next(file_iters[buff_to_refill]))
		57	file_iter_buff[buff_to_refill] = sorted(file_iter_buff[buff_to_refill],
		58	key=lambda rec: rec.when)
		59
		60	# If there aren't any more records, fine. Unless the buffer is also empty.
		61	# If that is the case, delete the buffer.
		62	except StopIteration:
		63	if len(file_iter_buff[buff_to_refill]) < 1:
		64	del file_iter_buff[buff_to_refill]
		65	del file_iters[buff_to_refill]
		66
		67	# Yield the record
		68	yield earliest
43		69
44	###############################################################################	70	###############################################################################
45	# Private functions	71	# Private functions
46	###############################################################################	72	###############################################################################
47		73
		74	# Returns an iterator to pull records from a file
		75	def _get_file_iter(file):
		76	with open(file,'rb') as f:
		77	while True:
		78	data = f.read(RECORD_HEAD_SIZE)
		79	try:
		80	type_num = struct.unpack_from('b',data)[0]
		81	except struct.error:
		82	break #We read to the end of the file
		83	type = _get_type(type_num)
		84	try:
		85	values = struct.unpack_from(StHeader.format +
		86	type.format,data)
		87	record_dict = dict(zip(type.keys,values))
		88	except struct.error:
		89	f.close()
		90	print("Invalid record detected, stopping.")
		91	exit()
		92	record = _dict2obj(record_dict)
		93	record.type_name = _get_type_name(type_num)
		94
		95	# All records should have a 'record type' field.
		96	# e.g. these are 'event's as opposed to 'error's
		97	record.record_type = "event"
		98
		99	# If there is no timestamp, set the time to 0
		100	if 'when' not in record.__dict__.keys():
		101	record.when = 0
		102
		103	yield record
		104
48	# Convert a dict into an object	105	# Convert a dict into an object
49	def _dict2obj(d):	106	def _dict2obj(d):
50	class Obj: pass	107	class Obj: pass
@@ -57,6 +114,8 @@ def _dict2obj(d):
57	# Trace record data types and accessor functions	114	# Trace record data types and accessor functions
58	###############################################################################	115	###############################################################################
59		116
		117	RECORD_HEAD_SIZE = 24
		118
60	class StHeader:	119	class StHeader:
61	format = '<bbhi'	120	format = '<bbhi'
62	formatStr = struct.Struct(format)	121	formatStr = struct.Struct(format)
@@ -78,7 +137,7 @@ class StParamData:
78	class StReleaseData:	137	class StReleaseData:
79	format = 'QQ'	138	format = 'QQ'
80	formatStr = struct.Struct(StHeader.format + format)	139	formatStr = struct.Struct(StHeader.format + format)
81	keys = StHeader.keys + ['release_time','deadline']	140	keys = StHeader.keys + ['when','deadline']
82	message = 'A job was/is going to be released.'	141	message = 'A job was/is going to be released.'
83		142
84	#Not yet used by Sched Trace	143	#Not yet used by Sched Trace