Add tools for overhead-processing workflow

author: Bjoern Brandenburg <bbb@mpi-sws.org> 2016-03-28 12:10:58 -0400
committer: Bjoern Brandenburg <bbb@mpi-sws.org> 2016-03-28 12:13:28 -0400
commit: 1f39b952832779cf7626afb35f720f485f4787b4 (patch)
tree: 0c065f7563ed04a17b07e08d0e4938c5f802ed11
parent: 2f0330ee24d4fa827cfd56e16bd3ae34b703a9ed (diff)
7 files changed, 595 insertions, 0 deletions
diff --git a/ft-combine-samples b/ft-combine-samples
new file mode 100755
index 0000000..a1f19ed
--- /dev/null
+++ b/ft-combine-samples
@@ -0,0 +1,89 @@
+#!/bin/bash
+STRIP_CMD=""
+function add_strip()
+{
+    TAG=$1
+    STRIP_CMD="$STRIP_CMD -e s/_${TAG}=[^_]*//"
+}
+while true
+do
+    case "$1" in
+        -n | --task-count)
+            shift
+            add_strip n
+            ;;
+        -c | --cpu)
+            shift
+            add_strip cpu
+            ;;
+        -m | --msg)
+            shift
+            add_strip msg
+            ;;
+        -s | --seq)
+            shift
+            add_strip seq
+            ;;
+        -u | --util)
+            shift
+            add_strip u
+            ;;
+        -l | --locks)
+            shift
+            add_strip locks
+            ;;
+        -x | --custom)
+            shift
+            add_strip $1
+            shift
+            ;;
+        --std)
+            shift
+            add_strip n
+            add_strip cpu
+            add_strip msg
+            add_strip seq
+            add_strip u
+            ;;
+      *)
+          break
+          ;;
+    esac
+done
+if [ -z "$STRIP_CMD" ]
+then
+    echo "Error: no fields to strip specified."
+    exit 1
+fi
+function do_append() {
+    TARGET=`basename $1 | sed $STRIP_CMD`
+    TARGET="combined-$TARGET"
+    printf "\n[$NUM/$TOTAL] Combining $1 -> $TARGET\n"
+    cat $1 >> $TARGET
+}
+TOTAL=$#
+NUM=0
+echo "File names will be mangled with: sed $STRIP_CMD"
+while [ "" != "$*" ]
+do
+    NUM=$((NUM + 1))
+    do_append "$1"
+    shift
+done
diff --git a/ft-compute-stats b/ft-compute-stats
new file mode 100755
index 0000000..e07fd16
--- /dev/null
+++ b/ft-compute-stats
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+from __future__ import division
+import numpy
+import optparse
+import sys
+import os
+from os.path import splitext
+def decode_key_value_filename(name):
+    "Map key=value_otherkey=other-value names to proper dictionary."
+    params = {}
+    parts = name.split('_')
+    for p in parts:
+        kv = p.split('=')
+        k = kv[0]
+        v = kv[1] if len(kv) > 1 else None
+        params[k] = v
+    return params
+def stats_for_file(fname, scale):
+    n    = 0
+    max  = 0
+    p95  = 0
+    p99  = 0
+    p999 = 0
+    min  = 0
+    med  = 0
+    avg  = 0
+    std  = 0
+    var  = 0
+    size = os.stat(fname).st_size
+    if size:
+        samples = numpy.memmap(fname, dtype='float32', mode='c')
+        n = len(samples)
+        if n > 0:
+            samples *= scale
+            max  = numpy.amax(samples)
+            p95  = numpy.percentile(samples, 95.0)
+            p99  = numpy.percentile(samples, 99.0)
+            p999 = numpy.percentile(samples, 99.9)
+            med  = numpy.median(samples)
+            avg  = numpy.mean(samples)
+            min  = numpy.amin(samples)
+            std = numpy.std(samples, ddof=1)
+            var = numpy.var(samples)
+    return [n, max, p999, p99, p95, avg, med, min, std, var]
+o = optparse.make_option
+opts = [
+    o('-p', '--cycles-per-usec', action='store', dest='cycles', type='float',
+      help='how many cycles per usec'),
+    ]
+defaults = {
+    'cycles'  : None,
+    }
+options = None
+def fmt_cell(x):
+    if type(x) == str:
+        return "%25s" % x
+    if type(x) == int:
+        return "%25d" % x
+    else:
+        return "%25.5f" % x
+def write_header():
+    labels = ["Plugin", "#cores", "Overhead", 'Unit', "#tasks",
+              "#samples",
+              "max", "99.9th perc.", "99th perc.", "95th perc.",
+              "avg", "med", "min", "std", "var", "file"]
+    header = ", ".join(fmt_cell(x) for x in labels)
+    print '#%s' % header[1:]
+def stats_file(fname):
+    name, ext = splitext(fname)
+    conf = decode_key_value_filename(name)
+    if 'overhead' in conf and conf['overhead'].rfind('-LATENCY') != -1:
+        # latency is stored in nanoseconds, not cycles
+        scale = 1 / 1000 # convert from nanoseconds
+        unit = 'microseconds (scale = 1/1000)'
+    elif options.cycles is None:
+        scale = 1
+        unit = 'cycles'
+    else:
+        # convert from cycles to usec
+        scale = 1 / options.cycles
+        unit = 'microseconds (scale = 1/%f)' % options.cycles
+    stats = stats_for_file(fname, scale)
+    if 'locks' in conf:
+        sched = '%s_locks=%s' % (conf['scheduler'], conf['locks'])
+    elif 'scheduler' in conf:
+        sched = conf['scheduler']
+    else:
+        sched = 'UNKNOWN'
+    ohead = conf['overhead'] if 'overhead' in conf else 'UNKNOWN'
+    n = conf['n'] if 'n' in conf else '*'
+    m = conf['m'] if 'm' in conf else '*'
+    info = [sched, m, ohead, unit, n]
+    finfo = [fname]
+    print ", ".join([fmt_cell(x) for x in  info + stats + finfo])
+    sys.stdout.flush()
+if __name__ == '__main__':
+    # FIXME: would be nicer with argparse
+    parser = optparse.OptionParser(option_list=opts)
+    parser.set_defaults(**defaults)
+    (options, files) = parser.parse_args()
+    try:
+        write_header()
+        for f in files:
+            try:
+                stats_file(f)
+            except IOError, msg:
+                print >> sys.stderr, msg
+    except KeyboardInterrupt:
+        pass
diff --git a/ft-count-samples b/ft-count-samples
new file mode 100755
index 0000000..b08f5fd
--- /dev/null
+++ b/ft-count-samples
@@ -0,0 +1,50 @@
+#!/bin/bash
+EVENTS=""
+for F in $*
+do
+    E=`echo $F | sed -e 's/.*overhead=\([^_.]*\).*/\1/'`
+    EVENTS="$EVENTS $E"
+done
+declare -A MATCHES
+for E in $EVENTS
+do
+    if [ -z "${MATCHES[$E]}" ]
+    then
+        MATCHES[$E]=`ls $* | egrep "_overhead=${E}[_.]"`
+    fi
+done
+PATH_TO_SCRIPT=`dirname $0`
+function find_helper()
+{
+        IN_PATH=`which $1`
+        if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] &&  [ -x "$PATH_TO_SCRIPT/$1" ]
+        then
+                echo "$PATH_TO_SCRIPT/$1"
+        else
+                echo "$IN_PATH"
+        fi
+}
+function die()
+{
+        echo "Error: $*"
+        exit 1
+}
+SHUFFLE_TRUNCATE=`find_helper shuffle_truncate.py`
+[ -z "$SHUFFLE_TRUNCATE" ] && die "Can't find 'shuffle_truncate.py' utility."
+for E in ${!MATCHES[@]}
+do
+    FILES="${MATCHES[$E]}"
+    if [ ! -z "$FILES" ]
+    then
+        COUNT=`$SHUFFLE_TRUNCATE --count --only-min $FILES`
+            printf "%20s, %7d\n" "$E" "$COUNT"
+    fi
+done
diff --git a/ft-extract-samples b/ft-extract-samples
new file mode 100755
index 0000000..fd8a89c
--- /dev/null
+++ b/ft-extract-samples
@@ -0,0 +1,75 @@
+#!/bin/bash
+PATH_TO_SCRIPT=`dirname $0`
+function find_helper()
+{
+        IN_PATH=`which $1`
+        REL_TO_PATH="$PATH_TO_SCRIPT/$2"
+        if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] &&  [ -x "$PATH_TO_SCRIPT/$1" ]
+        then
+                echo "$PATH_TO_SCRIPT/$1"
+        elif [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] &&  [ -x "$REL_TO_PATH/$1" ]
+        then
+            echo "$REL_TO_PATH/$1"
+        else
+                echo "$IN_PATH"
+        fi
+}
+function die()
+{
+        echo "Error: $*"
+        exit 1
+}
+SPLITTER=`find_helper ft2csv ../feather-trace-tools`
+[ -z "$SPLITTER" ] && die "Can't find 'ftsort' utility."
+BE_EVENTS="SEND_RESCHED SEND_XCALL QUANTUM_BOUNDARY"
+declare -A XTRA_OPTS
+for E in $BE_EVENTS
+do
+    XTRA_OPTS[$E]="-b"
+done
+# for future extension...
+IRQ_EVENTS=""
+for E in $IRQ_EVENTS
+do
+    XTRA_OPTS[$E]="-x ${XTRA_OPTS[$E]}"
+done
+OPTS="-r"
+OLD_EXT=bin
+EXT=float32 # NumPy float32 dtype format
+function do_split() {
+        printf "\n[$NUM/$TOTAL] Extracting samples from $1\n"
+        PRESENT=`$SPLITTER -l "$1" | sed -e 's/_START//' -e 's/_END//' | sort | uniq`
+        echo $1 '->' $PRESENT
+        for E in $PRESENT; do
+            EP=${E/_/-}
+            WHERE=`basename "$1" | sed -e "s/[.]${OLD_EXT}//"`
+            TARGET="${WHERE}_overhead=$EP.$EXT"
+            echo $1 $E ">>" $TARGET
+            $SPLITTER $OPTS ${XTRA_OPTS[$E]} $E "$1" >> $TARGET
+        done
+}
+if [ ! -f "$1" ]; then
+    echo  "Usage: ft-extract-samples <FEATHER-TRACE-FILE.bin>+"
+    exit 1
+fi
+TOTAL=$#
+NUM=0
+while [ "" != "$*" ]
+do
+    NUM=$((NUM + 1))
+    do_split "$1"
+    shift
+done
diff --git a/ft-select-samples b/ft-select-samples
new file mode 100755
index 0000000..fb3c39c
--- /dev/null
+++ b/ft-select-samples
@@ -0,0 +1,62 @@
+#!/bin/bash
+set -e
+COUNTS="$1"
+shift
+EVENTS=""
+for F in $*
+do
+    E=`echo $F | sed -e 's/.*overhead=\([^_.]*\).*/\1/'`
+    EVENTS="$EVENTS $E"
+done
+declare -A MATCHES
+for E in $EVENTS
+do
+    if [ -z "${MATCHES[$E]}" ]
+    then
+        MATCHES[$E]=`ls $* | egrep "_overhead=${E}[_.]"`
+    fi
+done
+PATH_TO_SCRIPT=`dirname $0`
+function find_helper()
+{
+        IN_PATH=`which $1`
+        if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] &&  [ -x "$PATH_TO_SCRIPT/$1" ]
+        then
+                echo "$PATH_TO_SCRIPT/$1"
+        else
+                echo "$IN_PATH"
+        fi
+}
+function die()
+{
+        echo "Error: $*"
+        exit 1
+}
+SHUFFLE_TRUNCATE=`find_helper ft-shuffle-truncate`
+[ -z "$SHUFFLE_TRUNCATE" ] && die "Can't find 'ft-shuffle-truncate' utility."
+EVENTS=`awk -F ',' '{print $1}' $COUNTS`
+for E in ${!MATCHES[@]}
+do
+    FILES="${MATCHES[$E]}"
+    CUTOFF=`awk -F ',' "/ $E,/ {print \\$2}" $COUNTS`
+    if [ -z "$FILES" ]
+    then
+        echo "[EE] No files for $E."
+    elif (( $CUTOFF == 0 ))
+    then
+        echo "[EE] Cutoff for $E is zero."
+    else
+        echo "$E -> $CUTOFF"
+        $SHUFFLE_TRUNCATE -c $CUTOFF $FILES
+    fi
+done
diff --git a/ft-shuffle-truncate b/ft-shuffle-truncate
new file mode 100755
index 0000000..44ace39
--- /dev/null
+++ b/ft-shuffle-truncate
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+import numpy
+import os
+import sys
+import optparse
+def load_binary_file(fname, dtype='float32', modify=False):
+    size = os.stat(fname).st_size
+    if size:
+        data = numpy.memmap(fname, dtype=dtype,
+                            mode='r+' if modify else 'c')
+        return data
+    else:
+        return []
+o = optparse.make_option
+opts = [
+    o('-c', '--cut-off', action='store', dest='cutoff', type='int',
+      help='max number of samples to use'),
+    o(None, '--count', action='store_true', dest='count',
+      help='just report the number of samples in each file'),
+    o(None, '--only-min', action='store_true', dest='only_min',
+      help='When counting, report only the minimum number of samples.'),
+    o(None, '--output-dir', action='store', dest='output_dir',
+      help='directory where output files should be stored.')
+    ]
+defaults = {
+    'cutoff'  : None,
+    'count'   : False,
+    'only_min' : False,
+    'output_dir' : None,
+    }
+options = None
+def load_files(fnames):
+    return [load_binary_file(f) for f in fnames]
+def shuffle_truncate(arrays, fnames, target_length=None):
+    # Determine how many samples we can use.
+    if target_length:
+        shortest = target_length
+    else:
+        shortest = min([len(a) for a in arrays])
+        print "Selecting %d samples from each data file." % shortest
+    # Make sure we'll select samples from all
+    # parts of the data file.
+    for a, n in zip(arrays, fnames):
+        if len(a) > shortest:
+            # Gotta be uniformly shuffled.
+            print "Shuffling %s ..." % n
+            numpy.random.shuffle(a)
+        else:
+            # not enough samples
+            print "Not shuffling %s." % n
+    # Now select the same number of samples from each file.
+    truncated = [a[:shortest] for a in arrays]
+    return truncated
+def store_files(arrays, fnames):
+    for a, fn in zip(arrays, fnames):
+        print 'Storing %s.' % fn
+        fd = open(fn, 'wb')
+        a.tofile(fd)
+        fd.close()
+def target_file(fname, want_ext):
+    f = os.path.basename(fname)
+    if options.output_dir:
+        d = options.output_dir
+    else:
+        d = os.path.dirname(fname)
+    if not want_ext is None:
+        name, ext = os.path.splitext(f)
+        f = "%s.%s" % (name, want_ext)
+    return os.path.join(d, f)
+def shuffle_truncate_store(files, cutoff=None, ext='sf32'):
+    data  = load_files(files)
+    trunc = shuffle_truncate(data, files, target_length=cutoff)
+    names = [target_file(f, ext) for f in files]
+    store_files(trunc, names)
+def shuffle_truncate_store_individually(files, cutoff):
+    fmt = "%%0%dd" % len(str(len(files)))
+    for i, f in enumerate(files):
+        print ("["  + fmt + "/%d] %s") % (i+1, len(files),
+                                          os.path.basename(f))
+        sys.stdout.flush()
+        name = target_file(f, 'sbn')
+        fs = os.stat(f)
+        if os.path.exists(name):
+            print "Skipping since %s exists." % name
+        elif fs.st_size == 0:
+            print "Skipping since trace is empty."
+        else:
+            shuffle_truncate_store([f], cutoff=cutoff)
+def report_sample_counts(files):
+    counts = []
+    fmt = "%%0%dd" % len(str(len(files)))
+    for i, f in enumerate(files):
+        d = load_binary_file(f)
+        counts.append(len(d))
+        if not options.only_min:
+            print ("["  + fmt + "/%d] %8d %s") % (i+1, len(files), len(d), f)
+            sys.stdout.flush()
+        del d
+    if options.only_min:
+        print min(counts)
+if __name__ == '__main__':
+    # FIXME: would be nicer with argparse
+    parser = optparse.OptionParser(option_list=opts)
+    parser.set_defaults(**defaults)
+    (options, files) = parser.parse_args()
+    if not files:
+        print "Usage: ft-shuffle-truncate data1.float32 data2.float32 data3.float32 ..."
+    else:
+        if options.count:
+            report_sample_counts(files)
+        elif options.cutoff:
+            shuffle_truncate_store_individually(files, options.cutoff)
+        else:
+            shuffle_truncate_store(files)
diff --git a/ft-sort-traces b/ft-sort-traces
new file mode 100755
index 0000000..524737d
--- /dev/null
+++ b/ft-sort-traces
@@ -0,0 +1,48 @@
+#!/bin/bash
+set -e
+PATH_TO_SCRIPT=`dirname $0`
+function find_helper()
+{
+        IN_PATH=`which $1`
+        REL_TO_PATH="$PATH_TO_SCRIPT/$2"
+        if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] &&  [ -x "$PATH_TO_SCRIPT/$1" ]
+        then
+                echo "$PATH_TO_SCRIPT/$1"
+        elif [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] &&  [ -x "$REL_TO_PATH/$1" ]
+        then
+            echo "$REL_TO_PATH/$1"
+        else
+                echo "$IN_PATH"
+        fi
+}
+function die()
+{
+        echo "Error: $*"
+        exit 1
+}
+SORT=`find_helper ftsort ../feather-trace-tools`
+[ -z "$SORT" ] && die "Can't find 'ftsort' utility."
+function do_sort() {
+        printf "[$NUM/$TOTAL] Sorting $1\n"
+        $SORT $1 2>&1
+}
+if [ ! -f "$1" ]; then
+    echo  "Usage: ft-sort-traces <FEATHER-TRACE-FILE.bin>+"
+    exit 1
+fi
+TOTAL=`echo $* | wc -w`
+NUM=0
+while [ "" != "$*" ]; do
+    NUM=$((NUM + 1))
+    do_sort $1
+    shift
+done
author	Bjoern Brandenburg <bbb@mpi-sws.org>	2016-03-28 12:10:58 -0400
committer	Bjoern Brandenburg <bbb@mpi-sws.org>	2016-03-28 12:13:28 -0400
commit	1f39b952832779cf7626afb35f720f485f4787b4 (patch)
tree	0c065f7563ed04a17b07e08d0e4938c5f802ed11
parent	2f0330ee24d4fa827cfd56e16bd3ae34b703a9ed (diff)

diff --git a/ft-combine-samples b/ft-combine-samples new file mode 100755 index 0000000..a1f19ed --- /dev/null +++ b/ft-combine-samples
@@ -0,0 +1,89 @@
	1	#!/bin/bash
	2
	3	STRIP_CMD=""
	4
	5	function add_strip()
	6	{
	7	TAG=$1
	8	STRIP_CMD="$STRIP_CMD -e s/_${TAG}=[^_]*//"
	9	}
	10
	11	while true
	12	do
	13	case "$1" in
	14	-n \| --task-count)
	15	shift
	16	add_strip n
	17	;;
	18
	19	-c \| --cpu)
	20	shift
	21	add_strip cpu
	22	;;
	23
	24	-m \| --msg)
	25	shift
	26	add_strip msg
	27	;;
	28
	29	-s \| --seq)
	30	shift
	31	add_strip seq
	32	;;
	33
	34	-u \| --util)
	35	shift
	36	add_strip u
	37	;;
	38
	39	-l \| --locks)
	40	shift
	41	add_strip locks
	42	;;
	43
	44	-x \| --custom)
	45	shift
	46	add_strip $1
	47	shift
	48	;;
	49
	50	--std)
	51	shift
	52	add_strip n
	53	add_strip cpu
	54	add_strip msg
	55	add_strip seq
	56	add_strip u
	57	;;
	58
	59	*)
	60	break
	61	;;
	62	esac
	63	done
	64
	65	if [ -z "$STRIP_CMD" ]
	66	then
	67	echo "Error: no fields to strip specified."
	68	exit 1
	69	fi
	70
	71	function do_append() {
	72	TARGET=`basename $1 \| sed $STRIP_CMD`
	73	TARGET="combined-$TARGET"
	74	printf "\n[$NUM/$TOTAL] Combining $1 -> $TARGET\n"
	75	cat $1 >> $TARGET
	76	}
	77
	78	TOTAL=$#
	79	NUM=0
	80
	81	echo "File names will be mangled with: sed $STRIP_CMD"
	82
	83
	84	while [ "" != "$*" ]
	85	do
	86	NUM=$((NUM + 1))
	87	do_append "$1"
	88	shift
	89	done


diff --git a/ft-compute-stats b/ft-compute-stats new file mode 100755 index 0000000..e07fd16 --- /dev/null +++ b/ft-compute-stats
@@ -0,0 +1,135 @@
	1	#!/usr/bin/env python
	2
	3	from __future__ import division
	4
	5	import numpy
	6
	7	import optparse
	8	import sys
	9	import os
	10
	11	from os.path import splitext
	12
	13
	14	def decode_key_value_filename(name):
	15	"Map key=value_otherkey=other-value names to proper dictionary."
	16	params = {}
	17	parts = name.split('_')
	18	for p in parts:
	19	kv = p.split('=')
	20	k = kv[0]
	21	v = kv[1] if len(kv) > 1 else None
	22	params[k] = v
	23	return params
	24
	25
	26	def stats_for_file(fname, scale):
	27	n = 0
	28	max = 0
	29	p95 = 0
	30	p99 = 0
	31	p999 = 0
	32	min = 0
	33	med = 0
	34	avg = 0
	35	std = 0
	36	var = 0
	37
	38	size = os.stat(fname).st_size
	39	if size:
	40	samples = numpy.memmap(fname, dtype='float32', mode='c')
	41
	42	n = len(samples)
	43	if n > 0:
	44	samples *= scale
	45	max = numpy.amax(samples)
	46	p95 = numpy.percentile(samples, 95.0)
	47	p99 = numpy.percentile(samples, 99.0)
	48	p999 = numpy.percentile(samples, 99.9)
	49	med = numpy.median(samples)
	50	avg = numpy.mean(samples)
	51	min = numpy.amin(samples)
	52
	53	std = numpy.std(samples, ddof=1)
	54	var = numpy.var(samples)
	55
	56	return [n, max, p999, p99, p95, avg, med, min, std, var]
	57
	58	o = optparse.make_option
	59
	60	opts = [
	61	o('-p', '--cycles-per-usec', action='store', dest='cycles', type='float',
	62	help='how many cycles per usec'),
	63	]
	64
	65	defaults = {
	66	'cycles' : None,
	67	}
	68
	69	options = None
	70
	71	def fmt_cell(x):
	72	if type(x) == str:
	73	return "%25s" % x
	74	if type(x) == int:
	75	return "%25d" % x
	76	else:
	77	return "%25.5f" % x
	78
	79	def write_header():
	80	labels = ["Plugin", "#cores", "Overhead", 'Unit', "#tasks",
	81	"#samples",
	82	"max", "99.9th perc.", "99th perc.", "95th perc.",
	83	"avg", "med", "min", "std", "var", "file"]
	84	header = ", ".join(fmt_cell(x) for x in labels)
	85	print '#%s' % header[1:]
	86
	87
	88	def stats_file(fname):
	89	name, ext = splitext(fname)
	90	conf = decode_key_value_filename(name)
	91
	92	if 'overhead' in conf and conf['overhead'].rfind('-LATENCY') != -1:
	93	# latency is stored in nanoseconds, not cycles
	94	scale = 1 / 1000 # convert from nanoseconds
	95	unit = 'microseconds (scale = 1/1000)'
	96	elif options.cycles is None:
	97	scale = 1
	98	unit = 'cycles'
	99	else:
	100	# convert from cycles to usec
	101	scale = 1 / options.cycles
	102	unit = 'microseconds (scale = 1/%f)' % options.cycles
	103
	104	stats = stats_for_file(fname, scale)
	105	if 'locks' in conf:
	106	sched = '%s_locks=%s' % (conf['scheduler'], conf['locks'])
	107	elif 'scheduler' in conf:
	108	sched = conf['scheduler']
	109	else:
	110	sched = 'UNKNOWN'
	111
	112	ohead = conf['overhead'] if 'overhead' in conf else 'UNKNOWN'
	113	n = conf['n'] if 'n' in conf else '*'
	114	m = conf['m'] if 'm' in conf else '*'
	115
	116	info = [sched, m, ohead, unit, n]
	117	finfo = [fname]
	118	print ", ".join([fmt_cell(x) for x in info + stats + finfo])
	119	sys.stdout.flush()
	120
	121	if __name__ == '__main__':
	122	# FIXME: would be nicer with argparse
	123	parser = optparse.OptionParser(option_list=opts)
	124	parser.set_defaults(**defaults)
	125	(options, files) = parser.parse_args()
	126
	127	try:
	128	write_header()
	129	for f in files:
	130	try:
	131	stats_file(f)
	132	except IOError, msg:
	133	print >> sys.stderr, msg
	134	except KeyboardInterrupt:
	135	pass


diff --git a/ft-count-samples b/ft-count-samples new file mode 100755 index 0000000..b08f5fd --- /dev/null +++ b/ft-count-samples
@@ -0,0 +1,50 @@
	1	#!/bin/bash
	2
	3	EVENTS=""
	4	for F in $*
	5	do
	6	E=`echo $F \| sed -e 's/.overhead=\([^_.]\).*/\1/'`
	7	EVENTS="$EVENTS $E"
	8	done
	9
	10	declare -A MATCHES
	11
	12	for E in $EVENTS
	13	do
	14	if [ -z "${MATCHES[$E]}" ]
	15	then
	16	MATCHES[$E]=`ls $* \| egrep "_overhead=${E}[_.]"`
	17	fi
	18	done
	19
	20	PATH_TO_SCRIPT=`dirname $0`
	21	function find_helper()
	22	{
	23	IN_PATH=`which $1`
	24	if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
	25	then
	26	echo "$PATH_TO_SCRIPT/$1"
	27	else
	28	echo "$IN_PATH"
	29	fi
	30	}
	31
	32	function die()
	33	{
	34	echo "Error: $*"
	35	exit 1
	36	}
	37
	38	SHUFFLE_TRUNCATE=`find_helper shuffle_truncate.py`
	39	[ -z "$SHUFFLE_TRUNCATE" ] && die "Can't find 'shuffle_truncate.py' utility."
	40
	41	for E in ${!MATCHES[@]}
	42	do
	43	FILES="${MATCHES[$E]}"
	44	if [ ! -z "$FILES" ]
	45	then
	46	COUNT=`$SHUFFLE_TRUNCATE --count --only-min $FILES`
	47	printf "%20s, %7d\n" "$E" "$COUNT"
	48	fi
	49	done
	50


diff --git a/ft-extract-samples b/ft-extract-samples new file mode 100755 index 0000000..fd8a89c --- /dev/null +++ b/ft-extract-samples
@@ -0,0 +1,75 @@
	1	#!/bin/bash
	2
	3	PATH_TO_SCRIPT=`dirname $0`
	4	function find_helper()
	5	{
	6	IN_PATH=`which $1`
	7	REL_TO_PATH="$PATH_TO_SCRIPT/$2"
	8	if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
	9	then
	10	echo "$PATH_TO_SCRIPT/$1"
	11	elif [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$REL_TO_PATH/$1" ]
	12	then
	13	echo "$REL_TO_PATH/$1"
	14	else
	15	echo "$IN_PATH"
	16	fi
	17	}
	18
	19	function die()
	20	{
	21	echo "Error: $*"
	22	exit 1
	23	}
	24
	25	SPLITTER=`find_helper ft2csv ../feather-trace-tools`
	26	[ -z "$SPLITTER" ] && die "Can't find 'ftsort' utility."
	27
	28
	29	BE_EVENTS="SEND_RESCHED SEND_XCALL QUANTUM_BOUNDARY"
	30	declare -A XTRA_OPTS
	31	for E in $BE_EVENTS
	32	do
	33	XTRA_OPTS[$E]="-b"
	34	done
	35
	36	# for future extension...
	37	IRQ_EVENTS=""
	38	for E in $IRQ_EVENTS
	39	do
	40	XTRA_OPTS[$E]="-x ${XTRA_OPTS[$E]}"
	41	done
	42
	43	OPTS="-r"
	44	OLD_EXT=bin
	45	EXT=float32 # NumPy float32 dtype format
	46
	47	function do_split() {
	48	printf "\n[$NUM/$TOTAL] Extracting samples from $1\n"
	49	PRESENT=`$SPLITTER -l "$1" \| sed -e 's/_START//' -e 's/_END//' \| sort \| uniq`
	50	echo $1 '->' $PRESENT
	51
	52	for E in $PRESENT; do
	53	EP=${E/_/-}
	54	WHERE=`basename "$1" \| sed -e "s/[.]${OLD_EXT}//"`
	55	TARGET="${WHERE}_overhead=$EP.$EXT"
	56	echo $1 $E ">>" $TARGET
	57	$SPLITTER $OPTS ${XTRA_OPTS[$E]} $E "$1" >> $TARGET
	58	done
	59	}
	60
	61	if [ ! -f "$1" ]; then
	62	echo "Usage: ft-extract-samples <FEATHER-TRACE-FILE.bin>+"
	63	exit 1
	64	fi
	65
	66	TOTAL=$#
	67	NUM=0
	68
	69
	70	while [ "" != "$*" ]
	71	do
	72	NUM=$((NUM + 1))
	73	do_split "$1"
	74	shift
	75	done


diff --git a/ft-select-samples b/ft-select-samples new file mode 100755 index 0000000..fb3c39c --- /dev/null +++ b/ft-select-samples
@@ -0,0 +1,62 @@
	1	#!/bin/bash
	2
	3	set -e
	4
	5	COUNTS="$1"
	6	shift
	7
	8	EVENTS=""
	9	for F in $*
	10	do
	11	E=`echo $F \| sed -e 's/.overhead=\([^_.]\).*/\1/'`
	12	EVENTS="$EVENTS $E"
	13	done
	14
	15	declare -A MATCHES
	16
	17	for E in $EVENTS
	18	do
	19	if [ -z "${MATCHES[$E]}" ]
	20	then
	21	MATCHES[$E]=`ls $* \| egrep "_overhead=${E}[_.]"`
	22	fi
	23	done
	24
	25	PATH_TO_SCRIPT=`dirname $0`
	26	function find_helper()
	27	{
	28	IN_PATH=`which $1`
	29	if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
	30	then
	31	echo "$PATH_TO_SCRIPT/$1"
	32	else
	33	echo "$IN_PATH"
	34	fi
	35	}
	36
	37	function die()
	38	{
	39	echo "Error: $*"
	40	exit 1
	41	}
	42
	43	SHUFFLE_TRUNCATE=`find_helper ft-shuffle-truncate`
	44	[ -z "$SHUFFLE_TRUNCATE" ] && die "Can't find 'ft-shuffle-truncate' utility."
	45
	46	EVENTS=`awk -F ',' '{print $1}' $COUNTS`
	47
	48	for E in ${!MATCHES[@]}
	49	do
	50	FILES="${MATCHES[$E]}"
	51	CUTOFF=`awk -F ',' "/ $E,/ {print \\$2}" $COUNTS`
	52	if [ -z "$FILES" ]
	53	then
	54	echo "[EE] No files for $E."
	55	elif (( $CUTOFF == 0 ))
	56	then
	57	echo "[EE] Cutoff for $E is zero."
	58	else
	59	echo "$E -> $CUTOFF"
	60	$SHUFFLE_TRUNCATE -c $CUTOFF $FILES
	61	fi
	62	done


diff --git a/ft-shuffle-truncate b/ft-shuffle-truncate new file mode 100755 index 0000000..44ace39 --- /dev/null +++ b/ft-shuffle-truncate
@@ -0,0 +1,136 @@
	1	#!/usr/bin/env python
	2
	3	import numpy
	4	import os
	5	import sys
	6	import optparse
	7
	8	def load_binary_file(fname, dtype='float32', modify=False):
	9	size = os.stat(fname).st_size
	10
	11	if size:
	12	data = numpy.memmap(fname, dtype=dtype,
	13	mode='r+' if modify else 'c')
	14	return data
	15	else:
	16	return []
	17
	18	o = optparse.make_option
	19
	20	opts = [
	21	o('-c', '--cut-off', action='store', dest='cutoff', type='int',
	22	help='max number of samples to use'),
	23
	24	o(None, '--count', action='store_true', dest='count',
	25	help='just report the number of samples in each file'),
	26
	27	o(None, '--only-min', action='store_true', dest='only_min',
	28	help='When counting, report only the minimum number of samples.'),
	29
	30	o(None, '--output-dir', action='store', dest='output_dir',
	31	help='directory where output files should be stored.')
	32	]
	33
	34	defaults = {
	35	'cutoff' : None,
	36	'count' : False,
	37	'only_min' : False,
	38	'output_dir' : None,
	39	}
	40
	41	options = None
	42
	43	def load_files(fnames):
	44	return [load_binary_file(f) for f in fnames]
	45
	46	def shuffle_truncate(arrays, fnames, target_length=None):
	47	# Determine how many samples we can use.
	48	if target_length:
	49	shortest = target_length
	50	else:
	51	shortest = min([len(a) for a in arrays])
	52	print "Selecting %d samples from each data file." % shortest
	53
	54	# Make sure we'll select samples from all
	55	# parts of the data file.
	56	for a, n in zip(arrays, fnames):
	57	if len(a) > shortest:
	58	# Gotta be uniformly shuffled.
	59	print "Shuffling %s ..." % n
	60	numpy.random.shuffle(a)
	61	else:
	62	# not enough samples
	63	print "Not shuffling %s." % n
	64
	65	# Now select the same number of samples from each file.
	66	truncated = [a[:shortest] for a in arrays]
	67
	68	return truncated
	69
	70	def store_files(arrays, fnames):
	71	for a, fn in zip(arrays, fnames):
	72	print 'Storing %s.' % fn
	73	fd = open(fn, 'wb')
	74	a.tofile(fd)
	75	fd.close()
	76
	77	def target_file(fname, want_ext):
	78	f = os.path.basename(fname)
	79	if options.output_dir:
	80	d = options.output_dir
	81	else:
	82	d = os.path.dirname(fname)
	83	if not want_ext is None:
	84	name, ext = os.path.splitext(f)
	85	f = "%s.%s" % (name, want_ext)
	86	return os.path.join(d, f)
	87
	88	def shuffle_truncate_store(files, cutoff=None, ext='sf32'):
	89	data = load_files(files)
	90	trunc = shuffle_truncate(data, files, target_length=cutoff)
	91	names = [target_file(f, ext) for f in files]
	92	store_files(trunc, names)
	93
	94	def shuffle_truncate_store_individually(files, cutoff):
	95	fmt = "%%0%dd" % len(str(len(files)))
	96	for i, f in enumerate(files):
	97	print ("[" + fmt + "/%d] %s") % (i+1, len(files),
	98	os.path.basename(f))
	99	sys.stdout.flush()
	100	name = target_file(f, 'sbn')
	101	fs = os.stat(f)
	102	if os.path.exists(name):
	103	print "Skipping since %s exists." % name
	104	elif fs.st_size == 0:
	105	print "Skipping since trace is empty."
	106	else:
	107	shuffle_truncate_store([f], cutoff=cutoff)
	108
	109	def report_sample_counts(files):
	110	counts = []
	111	fmt = "%%0%dd" % len(str(len(files)))
	112	for i, f in enumerate(files):
	113	d = load_binary_file(f)
	114	counts.append(len(d))
	115	if not options.only_min:
	116	print ("[" + fmt + "/%d] %8d %s") % (i+1, len(files), len(d), f)
	117	sys.stdout.flush()
	118	del d
	119	if options.only_min:
	120	print min(counts)
	121
	122	if __name__ == '__main__':
	123	# FIXME: would be nicer with argparse
	124	parser = optparse.OptionParser(option_list=opts)
	125	parser.set_defaults(**defaults)
	126	(options, files) = parser.parse_args()
	127
	128	if not files:
	129	print "Usage: ft-shuffle-truncate data1.float32 data2.float32 data3.float32 ..."
	130	else:
	131	if options.count:
	132	report_sample_counts(files)
	133	elif options.cutoff:
	134	shuffle_truncate_store_individually(files, options.cutoff)
	135	else:
	136	shuffle_truncate_store(files)


diff --git a/ft-sort-traces b/ft-sort-traces new file mode 100755 index 0000000..524737d --- /dev/null +++ b/ft-sort-traces
@@ -0,0 +1,48 @@
	1	#!/bin/bash
	2
	3	set -e
	4
	5	PATH_TO_SCRIPT=`dirname $0`
	6	function find_helper()
	7	{
	8	IN_PATH=`which $1`
	9	REL_TO_PATH="$PATH_TO_SCRIPT/$2"
	10	if [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$PATH_TO_SCRIPT/$1" ]
	11	then
	12	echo "$PATH_TO_SCRIPT/$1"
	13	elif [ -z "$IN_PATH" ] && [ ! -z "$PATH_TO_SCRIPT" ] && [ -x "$REL_TO_PATH/$1" ]
	14	then
	15	echo "$REL_TO_PATH/$1"
	16	else
	17	echo "$IN_PATH"
	18	fi
	19	}
	20
	21	function die()
	22	{
	23	echo "Error: $*"
	24	exit 1
	25	}
	26
	27	SORT=`find_helper ftsort ../feather-trace-tools`
	28	[ -z "$SORT" ] && die "Can't find 'ftsort' utility."
	29
	30	function do_sort() {
	31	printf "[$NUM/$TOTAL] Sorting $1\n"
	32	$SORT $1 2>&1
	33	}
	34
	35	if [ ! -f "$1" ]; then
	36	echo "Usage: ft-sort-traces <FEATHER-TRACE-FILE.bin>+"
	37	exit 1
	38	fi
	39
	40	TOTAL=`echo $* \| wc -w`
	41
	42	NUM=0
	43
	44	while [ "" != "$*" ]; do
	45	NUM=$((NUM + 1))
	46	do_sort $1
	47	shift
	48	done