############################################################################### # Description ############################################################################### # Compute and print C-EDF stat statistics import copy import sys import ConfigParser import math ############################################################################### # Public Functions ############################################################################### def cedf_stat_printer(stream,cdf_unit,clusterd,cluster_id,stat_type,out_file): m = 12 # CPUs # State sum_stats =0 count=0 last_time = None first_time = None min_stat = None max_stat = -1 dist_list = [] lenght_list = [] #reserve all non-zero lenght # Iterate over records, updating list for record in stream: if record.record_type == "event": if first_time is None: first_time = record.when last_time = record.when if record.record_type != "error": continue if record.type_name == stat_type+'_end':# and int(record.cluster)==cluster: if clusterd is True and int(record.job.cluster)!=cluster : continue if record.error_type == "simultaneous": length = float(record.job.simultaneous_end - record.job.simultaneous_start) else: length = float(record.job.inversion_end - record.job.inversion_start) if length > 0: if length > max_stat: max_stat = length if min_stat is None or length < min_stat: min_stat = length insert_dist_list(cdf_unit,dist_list, length) lenght_list.append(length) count +=1 sum_stats += length # We've seen all records. # Further update state if count > 0: # sort by the length of stat avg_stat = sum_stats / count #Standard Deviation cun_dev = 0 for l in lenght_list: cun_dev += (l-avg_stat)*(l-avg_stat) std_stat = math.sqrt(cun_dev/ count) else: avg_stat = 0 min_stat = 0 max_stat = 0 std_stat = 0 # Print out our information # NOTE: Here, we assume nanoseconds as the time unit. # May have to be changed in the future. info = ConfigParser.RawConfigParser() print "Type: %s" %(stat_type) info.add_section('%s'%(stat_type)) if clusterd is True: print "Cluster: %d" %(cluster) info.set('stat_type', 'Cluster', '%d'%(cluster)) if last_time is None or first_time is None: last_time =0 first_time =0 print "Anaylsis Time Period: %f ms" %(float(last_time-first_time)/ 1000000) info.set('%s'%(stat_type), 'Period', '%f'%(float(last_time-first_time)/ 1000000)) print "Number of %s: %d" % (stat_type,count) info.set('%s'%(stat_type), 'Num', '%d'%(count)) print "Min %s: %f ms" % (stat_type, float(min_stat / 1000000)) info.set('%s'%(stat_type), 'Min', '%f'%( float(min_stat / 1000000))) print "Max %s: %f ms" % (stat_type, float(max_stat / 1000000)) info.set('%s'%(stat_type), 'Max', '%f'%( float(max_stat / 1000000))) print "Avg %s: %f ms" % (stat_type, float(avg_stat / 1000000)) info.set('%s'%(stat_type), 'Avg', '%f'%( float(avg_stat / 1000000))) print "Std %s: %f ms" % (stat_type, float(std_stat / 1000000)) info.set('%s'%(stat_type), 'Std', '%f'%( float(std_stat / 1000000))) if clusterd is True: filename_csv = stat_type+".csv.cluster_"+str(cluster)+"."+out_file filename_info = stat_type+".info.cluster_"+str(cluster)+"."+out_file else: filename_csv = stat_type+".csv."+out_file filename_info = stat_type+".info."+out_file with open(filename_info, 'wb') as info_file: info.write(info_file) f = open(filename_csv,'w') #f.write("Time(ms),percent(\%)\n") f.write("0,0\n") if count > 0: u = 0 cdf_list = _get_cdf_list(dist_list,count,cdf_unit) for p in cdf_list: f.write("%f,%f\n" %(u,p)) u += cdf_unit f.close() #insert length into distribution list by cdf_unit def insert_dist_list(cdf_unit,dist_list, length): length_pos = int(length / (cdf_unit*1000000)) max_length = len(dist_list) if length_pos+1 > max_length: for _in in range(0,length_pos+1-max_length): dist_list.append(0) dist_list[length_pos]+=1 def _get_cdf_list(dist_list,count,cdf_unit): cdf_list = [] c_count = 0 for x in range(0,len(dist_list)): c_count += dist_list[x] cdf_list.append( float(100*c_count/float(count))) return cdf_list