summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-21 15:14:15 -0500
committerBjoern B. Brandenburg <bbb@cs.unc.edu>2011-02-21 15:14:15 -0500
commit95ce3338ad1208ee44f334b87fea2c32a7b888d4 (patch)
tree750eed2557a482e506fc620e34133d136ee7c45d
parent60f818697e2cee718c75e78999945fe2b628c72b (diff)
add scripts for shuffling and truncating sample files
-rwxr-xr-xcount-all17
-rwxr-xr-xshuffle-truncate-all18
-rwxr-xr-xshuffle_truncate.py102
3 files changed, 137 insertions, 0 deletions
diff --git a/count-all b/count-all
new file mode 100755
index 0000000..7a353bc
--- /dev/null
+++ b/count-all
@@ -0,0 +1,17 @@
1#!/bin/bash
2
3EVENTS="SCHED SCHED2 TICK CXS RELEASE RELEASE-LATENCY SEND-RESCHED"
4
5DIR="$1"
6
7if [ -z "$DIR" ]
8then
9 DIR=.
10fi
11
12for E in $EVENTS
13do
14 FILES=`find "$DIR" -iname "*overhead=${E}.bin"`
15 shuffle_truncate.py --count $FILES > counts_overhead=${E}.txt
16done
17
diff --git a/shuffle-truncate-all b/shuffle-truncate-all
new file mode 100755
index 0000000..b83c360
--- /dev/null
+++ b/shuffle-truncate-all
@@ -0,0 +1,18 @@
1#!/bin/bash
2
3COUNTS="$1"
4DIR="$2"
5
6if [ -z "$DIR" ]
7then
8 DIR=.
9fi
10
11EVENTS=`awk '{print $1}' $COUNTS`
12
13for E in $EVENTS
14do
15 CUTOFF=`grep $E $COUNTS | awk '{print $2}'`
16 FILES=`find "$DIR" -iname "*overhead=${E}.bin"`
17 shuffle_truncate.py -c $CUTOFF $FILES
18done
diff --git a/shuffle_truncate.py b/shuffle_truncate.py
new file mode 100755
index 0000000..6a48ca2
--- /dev/null
+++ b/shuffle_truncate.py
@@ -0,0 +1,102 @@
1#!/usr/bin/env python
2
3import numpy
4import os
5import sys
6import optparse
7
8from util import load_binary_file
9
10o = optparse.make_option
11
12opts = [
13 o('-c', '--cut-off', action='store', dest='cutoff', type='int',
14 help='max number of samples to use'),
15
16 o(None, '--count', action='store_true', dest='count',
17 help='just report the number of samples in each file'),
18
19 ]
20
21defaults = {
22 'cutoff' : None,
23 'count' : False,
24 }
25
26options = None
27
28def load_files(fnames):
29 return [load_binary_file(f) for f in fnames]
30
31def shuffle_truncate(arrays, fnames, target_length=None):
32 # Determine how many samples we can use.
33 if target_length:
34 shortest = target_length
35 else:
36 shortest = min([len(a) for a in arrays])
37 print "Selecting %d samples from each data file." % shortest
38
39 # Make sure we'll select samples from all
40 # parts of the data file.
41 for a, n in zip(arrays, fnames):
42 if len(a) > shortest:
43 # Gotta be uniformly shuffled.
44 print "Shuffling %s ..." % n
45 numpy.random.shuffle(a)
46
47 # Now select the same number of samples from each file.
48 truncated = [a[:shortest] for a in arrays]
49
50 return truncated
51
52def store_files(arrays, fnames):
53 for a, fn in zip(arrays, fnames):
54 print 'Storing %s.' % fn
55 fd = open(fn, 'wb')
56 a.tofile(fd)
57 fd.close()
58
59def target_file(fname, want_ext):
60 f = os.path.basename(fname)
61 if not want_ext is None:
62 name, ext = os.path.splitext(f)
63 return "%s.%s" % (name, want_ext)
64 else:
65 return f
66
67def shuffle_truncate_store(files, cutoff=None, ext='sbn'):
68 data = load_files(files)
69 trunc = shuffle_truncate(data, files, target_length=cutoff)
70 names = [target_file(f, ext) for f in files]
71 store_files(trunc, names)
72
73def shuffle_truncate_store_individually(files, cutoff):
74 fmt = "%%0%dd" % len(str(len(files)))
75 for i, f in enumerate(files):
76 print ("[" + fmt + "/%d] %s") % (i+1, len(files),
77 os.path.basename(f))
78 sys.stdout.flush()
79 shuffle_truncate_store([f], cutoff=cutoff)
80
81def report_sample_counts(files):
82 fmt = "%%0%dd" % len(str(len(files)))
83 for i, f in enumerate(files):
84 d = load_binary_file(f)
85 print ("[" + fmt + "/%d] %8d %s") % (i+1, len(files), len(d), f)
86 sys.stdout.flush()
87 del d
88
89if __name__ == '__main__':
90 parser = optparse.OptionParser(option_list=opts)
91 parser.set_defaults(**defaults)
92 (options, files) = parser.parse_args()
93
94 if not files:
95 print "Usage: shuffle_truncate_py data1.bin data2.bin data3.bin ..."
96 else:
97 if options.count:
98 report_sample_counts(files)
99 elif options.cutoff:
100 shuffle_truncate_store_individually(files, options.cutoff)
101 else:
102 shuffle_truncate_store(files)