unit_trace/cedf_stat_printer.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

###############################################################################
# Description
###############################################################################
# Compute and print C-EDF stat statistics
import copy
import sys
import ConfigParser
import math
###############################################################################
# Public Functions
###############################################################################
def cedf_stat_printer(stream,cdf_unit,clusterd,cluster_id,stat_type,out_file):

    m = 12        # CPUs               
    # State
    sum_stats =0
    count=0
    last_time = None
    first_time = None
    min_stat = None
    max_stat = -1
    dist_list = []    
    lenght_list = []  #reserve all non-zero lenght
    # Iterate over records, updating list
    for record in stream:
        if record.record_type == "event":
            if first_time is None:
                first_time = record.when
            last_time = record.when      
        
        if record.record_type != "error":
            continue
            
        if record.type_name == stat_type+'_end':# and int(record.cluster)==cluster:
            if clusterd is True and int(record.job.cluster)!=cluster :
                continue
            if record.error_type == "simultaneous":
                length = float(record.job.simultaneous_end - record.job.simultaneous_start)
            else:
                length = float(record.job.inversion_end - record.job.inversion_start)
            if length > 0:
                if length > max_stat:
                    max_stat = length
                if min_stat is None or length < min_stat:
                    min_stat = length
                insert_dist_list(cdf_unit,dist_list, length)
                lenght_list.append(length)
                count +=1
                sum_stats += length
                
                        
    # We've seen all records.
    # Further update state
    if count > 0:
        # sort by the length of stat             
        avg_stat = sum_stats / count
        #Standard Deviation
        cun_dev = 0
        for l in lenght_list:
            cun_dev += (l-avg_stat)*(l-avg_stat)
        std_stat = math.sqrt(cun_dev/ count)
    else:
        avg_stat = 0
        min_stat = 0
        max_stat = 0
        std_stat = 0
    # Print out our information
    # NOTE: Here, we assume nanoseconds as the time unit.
    # May have to be changed in the future.
    info = ConfigParser.RawConfigParser()
    
    print "Type: %s" %(stat_type) 
    info.add_section('%s'%(stat_type))
    if clusterd is True: 
        print "Cluster: %d" %(cluster)
        info.set('stat_type', 'Cluster', '%d'%(cluster))
        
    if last_time is None or first_time is None:
        last_time =0
        first_time =0
    
    print "Anaylsis Time Period: %f ms" %(float(last_time-first_time)/ 1000000)
    info.set('%s'%(stat_type), 'Period', '%f'%(float(last_time-first_time)/ 1000000))
    print "Number of %s: %d" % (stat_type,count)
    info.set('%s'%(stat_type), 'Num', '%d'%(count))
    print "Min %s: %f ms" % (stat_type, float(min_stat / 1000000))
    info.set('%s'%(stat_type), 'Min', '%f'%( float(min_stat / 1000000)))
    print "Max %s: %f ms" % (stat_type, float(max_stat / 1000000))
    info.set('%s'%(stat_type), 'Max', '%f'%( float(max_stat / 1000000)))
    print "Avg %s: %f ms" % (stat_type, float(avg_stat / 1000000))
    info.set('%s'%(stat_type), 'Avg', '%f'%( float(avg_stat / 1000000)))
    print "Std %s: %f ms" % (stat_type, float(std_stat / 1000000))
    info.set('%s'%(stat_type), 'Std', '%f'%( float(std_stat / 1000000)))    
 
    if clusterd is True:
        filename_csv = stat_type+".csv.cluster_"+str(cluster)+"."+out_file
        filename_info = stat_type+".info.cluster_"+str(cluster)+"."+out_file
    else:
        filename_csv = stat_type+".csv."+out_file
        filename_info = stat_type+".info."+out_file
        
    with open(filename_info, 'wb') as info_file:
        info.write(info_file)
    
    f = open(filename_csv,'w')
    #f.write("Time(ms),percent(\%)\n")
    f.write("0,0\n") 
    if count > 0:
        u = 0
        cdf_list = _get_cdf_list(dist_list,count,cdf_unit)
        for p in cdf_list:
            f.write("%f,%f\n" %(u,p)) 
            u += cdf_unit
    f.close()
#insert length into distribution list by cdf_unit
def insert_dist_list(cdf_unit,dist_list, length):
    length_pos = int(length / (cdf_unit*1000000))
    max_length = len(dist_list)
    if length_pos+1 > max_length:
        for _in in range(0,length_pos+1-max_length):
            dist_list.append(0)
    
    dist_list[length_pos]+=1
                       
def _get_cdf_list(dist_list,count,cdf_unit):
    cdf_list = []
    c_count = 0
    for x in range(0,len(dist_list)):
        c_count += dist_list[x]
        cdf_list.append(  float(100*c_count/float(count)))
    return cdf_list