aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2016-03-28 12:10:58 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2016-03-28 12:13:28 -0400
commit1f39b952832779cf7626afb35f720f485f4787b4 (patch)
tree0c065f7563ed04a17b07e08d0e4938c5f802ed11
parent2f0330ee24d4fa827cfd56e16bd3ae34b703a9ed (diff)
Add tools for overhead-processing workflow
-rwxr-xr-xft-combine-samples89
-rwxr-xr-xft-compute-stats135
-rwxr-xr-xft-count-samples50
-rwxr-xr-xft-extract-samples75
-rwxr-xr-xft-select-samples62
-rwxr-xr-xft-shuffle-truncate136
-rwxr-xr-xft-sort-traces48
7 files changed, 595 insertions, 0 deletions
diff --git a/ft-combine-samples b/ft-combine-samples
new file mode 100755
index 0000000..a1f19ed
--- /dev/null
+++ b/ft-combine-samples
@@ -0,0 +1,89 @@
1#!/bin/bash
2
3STRIP_CMD=""
4
5function add_strip()
6{
7 TAG=$1
8 STRIP_CMD="$STRIP_CMD -e s/_${TAG}=[^_]*//"
9}
10
11while true
12do
13 case "$1" in
14 -n | --task-count)
15 shift
16 add_strip n
17 ;;
18
19 -c | --cpu)
20 shift
21 add_strip cpu
22 ;;
23
24 -m | --msg)
25 shift
26 add_strip msg
27 ;;
28
29 -s | --seq)
30 shift
31 add_strip seq
32 ;;
33
34 -u | --util)
35 shift
36 add_strip u
37 ;;
38
39 -l | --locks)
40 shift
41 add_strip locks
42 ;;
43
44 -x | --custom)
45 shift
46 add_strip $1
47 shift
48 ;;
49
50 --std)
51 shift
52 add_strip n
53 add_strip cpu
54 add_strip msg
55 add_strip seq
56 add_strip u
57 ;;
58
59 *)
60 break
61 ;;
62 esac
63done
64
65if [ -z "$STRIP_CMD" ]
66then
67 echo "Error: no fields to strip specified."
68 exit 1
69fi
70
71function do_append() {
72 TARGET=`basename $1 | sed $STRIP_CMD`
73 TARGET="combined-$TARGET"
74 printf "\n[$NUM/$TOTAL] Combining $1 -> $TARGET\n"
75 cat $1 >> $TARGET
76}
77
78TOTAL=$#
79NUM=0
80
81echo "File names will be mangled with: sed $STRIP_CMD"
82
83
84while [ "" != "$*" ]
85do
86 NUM=$((NUM + 1))
87 do_append "$1"
88 shift
89done
diff --git a/ft-compute-stats b/ft-compute-stats
new file mode 100755
index 0000000..e07fd16
--- /dev/null
+++ b/ft-compute-stats
@@ -0,0 +1,135 @@
1#!/usr/bin/env python
2
3from __future__ import division
4
5import numpy
6
7import optparse
8import sys
9import os
10
11from os.path import splitext
12
13
14def decode_key_value_filename(name):
15 "Map key=value_otherkey=other-value names to proper dictionary."
16 params = {}
17 parts = name.split('_')
18 for p in parts:
19 kv = p.split('=')
20 k = kv[0]
21 v = kv[1] if len(kv) > 1 else None
22 params[k] = v
23 return params
24
25
26def stats_for_file(fname, scale):
27 n = 0
28 max = 0
29 p95 = 0
30 p99 = 0
31 p999 = 0
32 min = 0
33 med = 0
34 avg = 0
35 std = 0
36 var = 0
37
38 size = os.stat(fname).st_size
39 if size:
40 samples = numpy.memmap(fname, dtype='float32', mode='c')
41
42 n = len(samples)
43 if n > 0:
44 samples *= scale
45 max = numpy.amax(samples)
46 p95 = numpy.percentile(samples, 95.0)
47 p99 = numpy.percentile(samples, 99.0)
48 p999 = numpy.percentile(samples, 99.9)
49 med = numpy.median(samples)
50 avg = numpy.mean(samples)
51 min = numpy.amin(samples)
52
53 std = numpy.std(samples, ddof=1)
54 var = numpy.var(samples)
55
56 return [n, max, p999, p99, p95, avg, med, min, std, var]
57
58o = optparse.make_option
59
60opts = [
61 o('-p', '--cycles-per-usec', action='store', dest='cycles', type='float',
62 help='how many cycles per usec'),
63 ]
64
65defaults = {
66 'cycles' : None,
67 }
68
69options = None
70
71def fmt_cell(x):
72 if type(x) == str:
73 return "%25s" % x
74 if type(x) == int:
75 return "%25d" % x
76 else:
77 return "%25.5f" % x
78
79def write_header():
80 labels = ["Plugin", "#cores", "Overhead", 'Unit', "#tasks",
81 "#samples",
82 "max", "99.9th perc.", "99th perc.", "95th perc.",
83 "avg", "med", "min", "std", "var", "file"]
84 header = ", ".join(fmt_cell(x) for x in labels)
85 print '#%s' % header[1:]
86
87
88def stats_file(fname):
89 name, ext = splitext(fname)
90 conf = decode_key_value_filename(name)
91
92 if 'overhead' in conf and conf['overhead'].rfind('-LATENCY') != -1:
93 # latency is stored in nanoseconds, not cycles
94 scale = 1 / 1000 # convert from nanoseconds
95 unit = 'microseconds (scale = 1/1000)'
96 elif options.cycles is None:
97 scale = 1
98 unit = 'cycles'
99 else:
100 # convert from cycles to usec
101 scale = 1 / options.cycles
102 unit = 'microseconds (scale = 1/%f)' % options.cycles
103
104 stats = stats_for_file(fname, scale)
105 if 'locks' in conf:
106 sched = '%s_locks=%s' % (conf['scheduler'], conf['locks'])
107 elif 'scheduler' in conf:
108 sched = conf['scheduler']
109 else:
110 sched = 'UNKNOWN'
111
112 ohead = conf['overhead'] if 'overhead' in conf else 'UNKNOWN'
113 n = conf['n'] if 'n' in conf else '*'
114 m = conf['m'] if 'm' in conf else '*'
115
116 info = [sched, m, ohead, unit, n]
117 finfo = [fname]
118 print ", ".join([fmt_cell(x) for x in info + stats + finfo])
119 sys.stdout.flush()
120
121if __name__ == '__main__':
122 # FIXME: would be nicer with argparse
123 parser = optparse.OptionParser(option_list=opts)
124 parser.set_defaults(**defaults)
125 (options, files) = parser.parse_args()
126
127 try:
128 write_header()
129 for f in files:
130 try:
131 stats_file(f)
132 except IOError, msg:
133 print >> sys.stderr, msg
134 except KeyboardInterrupt:
135 pass
diff --git a/ft-count-samples b/ft-count-samples
new file mode 100755
index 0000000..b08f5fd
--- /dev/null
+++ b/ft-count-samples
@@ -0,0 +1,50 @@
1#!/bin/bash
2
3EVENTS=""
4for F in $*
5do
6 E=`echo $F | sed -e 's/.*overhead=\([^_.]*\).*/\1/'`
7 EVENTS="$EVENTS $E"
8done
9
10declare -A MATCHES
11
12for E in $EVENTS
13do
14 if [ -z "${MATCHES[$E]}" ]
15 then
16 MATCHES[$E]=`ls $* | egrep "_overhead=${E}[_.]"`
17 fi
18done
19
20PATH_TO_SCRIPT=`dirname $0`
21function find_helper()
22{
23 IN_PATH=`which $1`
24 if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
25 then
26 echo "$PATH_TO_SCRIPT/$1"
27 else
28 echo "$IN_PATH"
29 fi
30}
31
32function die()
33{
34 echo "Error: $*"
35 exit 1
36}
37
38SHUFFLE_TRUNCATE=`find_helper shuffle_truncate.py`
39[ -z "$SHUFFLE_TRUNCATE" ] && die "Can't find 'shuffle_truncate.py' utility."
40
41for E in ${!MATCHES[@]}
42do
43 FILES="${MATCHES[$E]}"
44 if [ ! -z "$FILES" ]
45 then
46 COUNT=`$SHUFFLE_TRUNCATE --count --only-min $FILES`
47 printf "%20s, %7d\n" "$E" "$COUNT"
48 fi
49done
50
diff --git a/ft-extract-samples b/ft-extract-samples
new file mode 100755
index 0000000..fd8a89c
--- /dev/null
+++ b/ft-extract-samples
@@ -0,0 +1,75 @@
1#!/bin/bash
2
3PATH_TO_SCRIPT=`dirname $0`
4function find_helper()
5{
6 IN_PATH=`which $1`
7 REL_TO_PATH="$PATH_TO_SCRIPT/$2"
8 if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
9 then
10 echo "$PATH_TO_SCRIPT/$1"
11 elif [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$REL_TO_PATH/$1" ]
12 then
13 echo "$REL_TO_PATH/$1"
14 else
15 echo "$IN_PATH"
16 fi
17}
18
19function die()
20{
21 echo "Error: $*"
22 exit 1
23}
24
25SPLITTER=`find_helper ft2csv ../feather-trace-tools`
26[ -z "$SPLITTER" ] && die "Can't find 'ftsort' utility."
27
28
29BE_EVENTS="SEND_RESCHED SEND_XCALL QUANTUM_BOUNDARY"
30declare -A XTRA_OPTS
31for E in $BE_EVENTS
32do
33 XTRA_OPTS[$E]="-b"
34done
35
36# for future extension...
37IRQ_EVENTS=""
38for E in $IRQ_EVENTS
39do
40 XTRA_OPTS[$E]="-x ${XTRA_OPTS[$E]}"
41done
42
43OPTS="-r"
44OLD_EXT=bin
45EXT=float32 # NumPy float32 dtype format
46
47function do_split() {
48 printf "\n[$NUM/$TOTAL] Extracting samples from $1\n"
49 PRESENT=`$SPLITTER -l "$1" | sed -e 's/_START//' -e 's/_END//' | sort | uniq`
50 echo $1 '->' $PRESENT
51
52 for E in $PRESENT; do
53 EP=${E/_/-}
54 WHERE=`basename "$1" | sed -e "s/[.]${OLD_EXT}//"`
55 TARGET="${WHERE}_overhead=$EP.$EXT"
56 echo $1 $E ">>" $TARGET
57 $SPLITTER $OPTS ${XTRA_OPTS[$E]} $E "$1" >> $TARGET
58 done
59}
60
61if [ ! -f "$1" ]; then
62 echo "Usage: ft-extract-samples <FEATHER-TRACE-FILE.bin>+"
63 exit 1
64fi
65
66TOTAL=$#
67NUM=0
68
69
70while [ "" != "$*" ]
71do
72 NUM=$((NUM + 1))
73 do_split "$1"
74 shift
75done
diff --git a/ft-select-samples b/ft-select-samples
new file mode 100755
index 0000000..fb3c39c
--- /dev/null
+++ b/ft-select-samples
@@ -0,0 +1,62 @@
1#!/bin/bash
2
3set -e
4
5COUNTS="$1"
6shift
7
8EVENTS=""
9for F in $*
10do
11 E=`echo $F | sed -e 's/.*overhead=\([^_.]*\).*/\1/'`
12 EVENTS="$EVENTS $E"
13done
14
15declare -A MATCHES
16
17for E in $EVENTS
18do
19 if [ -z "${MATCHES[$E]}" ]
20 then
21 MATCHES[$E]=`ls $* | egrep "_overhead=${E}[_.]"`
22 fi
23done
24
25PATH_TO_SCRIPT=`dirname $0`
26function find_helper()
27{
28 IN_PATH=`which $1`
29 if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
30 then
31 echo "$PATH_TO_SCRIPT/$1"
32 else
33 echo "$IN_PATH"
34 fi
35}
36
37function die()
38{
39 echo "Error: $*"
40 exit 1
41}
42
43SHUFFLE_TRUNCATE=`find_helper ft-shuffle-truncate`
44[ -z "$SHUFFLE_TRUNCATE" ] && die "Can't find 'ft-shuffle-truncate' utility."
45
46EVENTS=`awk -F ',' '{print $1}' $COUNTS`
47
48for E in ${!MATCHES[@]}
49do
50 FILES="${MATCHES[$E]}"
51 CUTOFF=`awk -F ',' "/ $E,/ {print \\$2}" $COUNTS`
52 if [ -z "$FILES" ]
53 then
54 echo "[EE] No files for $E."
55 elif (( $CUTOFF == 0 ))
56 then
57 echo "[EE] Cutoff for $E is zero."
58 else
59 echo "$E -> $CUTOFF"
60 $SHUFFLE_TRUNCATE -c $CUTOFF $FILES
61 fi
62done
diff --git a/ft-shuffle-truncate b/ft-shuffle-truncate
new file mode 100755
index 0000000..44ace39
--- /dev/null
+++ b/ft-shuffle-truncate
@@ -0,0 +1,136 @@
1#!/usr/bin/env python
2
3import numpy
4import os
5import sys
6import optparse
7
8def load_binary_file(fname, dtype='float32', modify=False):
9 size = os.stat(fname).st_size
10
11 if size:
12 data = numpy.memmap(fname, dtype=dtype,
13 mode='r+' if modify else 'c')
14 return data
15 else:
16 return []
17
18o = optparse.make_option
19
20opts = [
21 o('-c', '--cut-off', action='store', dest='cutoff', type='int',
22 help='max number of samples to use'),
23
24 o(None, '--count', action='store_true', dest='count',
25 help='just report the number of samples in each file'),
26
27 o(None, '--only-min', action='store_true', dest='only_min',
28 help='When counting, report only the minimum number of samples.'),
29
30 o(None, '--output-dir', action='store', dest='output_dir',
31 help='directory where output files should be stored.')
32 ]
33
34defaults = {
35 'cutoff' : None,
36 'count' : False,
37 'only_min' : False,
38 'output_dir' : None,
39 }
40
41options = None
42
43def load_files(fnames):
44 return [load_binary_file(f) for f in fnames]
45
46def shuffle_truncate(arrays, fnames, target_length=None):
47 # Determine how many samples we can use.
48 if target_length:
49 shortest = target_length
50 else:
51 shortest = min([len(a) for a in arrays])
52 print "Selecting %d samples from each data file." % shortest
53
54 # Make sure we'll select samples from all
55 # parts of the data file.
56 for a, n in zip(arrays, fnames):
57 if len(a) > shortest:
58 # Gotta be uniformly shuffled.
59 print "Shuffling %s ..." % n
60 numpy.random.shuffle(a)
61 else:
62 # not enough samples
63 print "Not shuffling %s." % n
64
65 # Now select the same number of samples from each file.
66 truncated = [a[:shortest] for a in arrays]
67
68 return truncated
69
70def store_files(arrays, fnames):
71 for a, fn in zip(arrays, fnames):
72 print 'Storing %s.' % fn
73 fd = open(fn, 'wb')
74 a.tofile(fd)
75 fd.close()
76
77def target_file(fname, want_ext):
78 f = os.path.basename(fname)
79 if options.output_dir:
80 d = options.output_dir
81 else:
82 d = os.path.dirname(fname)
83 if not want_ext is None:
84 name, ext = os.path.splitext(f)
85 f = "%s.%s" % (name, want_ext)
86 return os.path.join(d, f)
87
88def shuffle_truncate_store(files, cutoff=None, ext='sf32'):
89 data = load_files(files)
90 trunc = shuffle_truncate(data, files, target_length=cutoff)
91 names = [target_file(f, ext) for f in files]
92 store_files(trunc, names)
93
94def shuffle_truncate_store_individually(files, cutoff):
95 fmt = "%%0%dd" % len(str(len(files)))
96 for i, f in enumerate(files):
97 print ("[" + fmt + "/%d] %s") % (i+1, len(files),
98 os.path.basename(f))
99 sys.stdout.flush()
100 name = target_file(f, 'sbn')
101 fs = os.stat(f)
102 if os.path.exists(name):
103 print "Skipping since %s exists." % name
104 elif fs.st_size == 0:
105 print "Skipping since trace is empty."
106 else:
107 shuffle_truncate_store([f], cutoff=cutoff)
108
109def report_sample_counts(files):
110 counts = []
111 fmt = "%%0%dd" % len(str(len(files)))
112 for i, f in enumerate(files):
113 d = load_binary_file(f)
114 counts.append(len(d))
115 if not options.only_min:
116 print ("[" + fmt + "/%d] %8d %s") % (i+1, len(files), len(d), f)
117 sys.stdout.flush()
118 del d
119 if options.only_min:
120 print min(counts)
121
122if __name__ == '__main__':
123 # FIXME: would be nicer with argparse
124 parser = optparse.OptionParser(option_list=opts)
125 parser.set_defaults(**defaults)
126 (options, files) = parser.parse_args()
127
128 if not files:
129 print "Usage: ft-shuffle-truncate data1.float32 data2.float32 data3.float32 ..."
130 else:
131 if options.count:
132 report_sample_counts(files)
133 elif options.cutoff:
134 shuffle_truncate_store_individually(files, options.cutoff)
135 else:
136 shuffle_truncate_store(files)
diff --git a/ft-sort-traces b/ft-sort-traces
new file mode 100755
index 0000000..524737d
--- /dev/null
+++ b/ft-sort-traces
@@ -0,0 +1,48 @@
1#!/bin/bash
2
3set -e
4
5PATH_TO_SCRIPT=`dirname $0`
6function find_helper()
7{
8 IN_PATH=`which $1`
9 REL_TO_PATH="$PATH_TO_SCRIPT/$2"
10 if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
11 then
12 echo "$PATH_TO_SCRIPT/$1"
13 elif [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$REL_TO_PATH/$1" ]
14 then
15 echo "$REL_TO_PATH/$1"
16 else
17 echo "$IN_PATH"
18 fi
19}
20
21function die()
22{
23 echo "Error: $*"
24 exit 1
25}
26
27SORT=`find_helper ftsort ../feather-trace-tools`
28[ -z "$SORT" ] && die "Can't find 'ftsort' utility."
29
30function do_sort() {
31 printf "[$NUM/$TOTAL] Sorting $1\n"
32 $SORT $1 2>&1
33}
34
35if [ ! -f "$1" ]; then
36 echo "Usage: ft-sort-traces <FEATHER-TRACE-FILE.bin>+"
37 exit 1
38fi
39
40TOTAL=`echo $* | wc -w`
41
42NUM=0
43
44while [ "" != "$*" ]; do
45 NUM=$((NUM + 1))
46 do_sort $1
47 shift
48done