/* * pm_common.c * * Read / write data samples on file in binary format * Perform first elaboration on the (possibily big) samples set */ #include "pm_common.h" #define BLOCK_MUL 500 #define SBLOCK_SIZE 1024 /* the number of hot reads that we can find is the same * as the number of iterations we performed in pm_task */ #define NUMHOTREADS REFTOTAL #define min(a,b) ((a)<(b)?(a):(b)) #define max(a,b) ((a)>(b)?(a):(b)) /* * Quick and dirty statistics to get a rough estimate of cache access times * It does not tell the difference between "good" and "bad" overall * sampling points, so max values coming out from this are not accurate */ #define WANT_STATISTICS #ifdef WANT_STATISTICS #include #define CFREQ 2128.207 #endif #ifdef DEBUG #define dprintf(arg...) fprintf(stderr,arg) #else #define dprintf(arg...) #endif /* simple sequential write on disk. * (concurrent writes must be protected) * * saved_data_entry is ~ 20 B; so 100000 Datapoinst are ~ 2MB */ int serialize_data_entry(char *filename, struct data_entry *samples, int num) { int fd; int i, j; /* buffer some data in memory before writing */ struct saved_data_entry to_save[SBLOCK_SIZE]; fd = open(filename, O_WRONLY | O_APPEND | O_CREAT, 0660); if (fd == -1){ perror("open"); return -1; } for (i = 0; i < num / SBLOCK_SIZE; i++) { memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE); for (j = 0; j < SBLOCK_SIZE; j++) { to_save[j].access_type = samples[j].access_type; to_save[j].access_time = samples[j].access_time; to_save[j].cpu = samples[j].cpu; to_save[j].preemption_length = samples[j].preemption_length; } samples = &samples[j]; if (write(fd, to_save, sizeof(struct saved_data_entry) * SBLOCK_SIZE) == -1) { close(fd); perror("Write failed\n"); return -1; } } memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE); for (j = 0; j < num % SBLOCK_SIZE; j++) { to_save[j].access_type = samples[j].access_type; to_save[j].access_time = samples[j].access_time; to_save[j].cpu = samples[j].cpu; to_save[j].preemption_length = samples[j].preemption_length; } if (write(fd, to_save, sizeof(struct saved_data_entry) * j) == -1) { close(fd); perror("Write failed\n"); return -1; } dprintf("Written %d entries\n", i*SBLOCK_SIZE + j); close(fd); return 0; } /* * Presumably, all data will be written on little endian machines. * I assume the binary format is little endian * * return -1 on error * return number of samples on success */ int read_sdata_entry(const char *filename, struct saved_data_entry **samples) { int fd; int i,j; int num_samples, file_size; struct saved_data_entry block_read[BLOCK_MUL]; int bytes_read; fd = open(filename, O_RDONLY); if(fd == -1){ perror("open"); return -1; } /* Compute file size */ file_size = lseek(fd, 0, SEEK_END); if(file_size == -1){ close(fd); perror("lseek"); return -1; } /* Return to start position */ if(lseek(fd, 0, SEEK_SET) == -1){ close(fd); perror("lseek"); return -1; } num_samples = file_size / sizeof(struct saved_data_entry); dprintf("N entries: %d\n", num_samples); /* Allocate memory for data_entry samples */ *samples = (struct saved_data_entry *) (malloc(num_samples * sizeof(struct saved_data_entry))); if(*samples == NULL){ close(fd); perror("malloc"); return -1; } /* Read all the file */ j = 0; do { /* Read file (in BLOCK_MUL * sizeof(saved_data_entrty) block size) */ bytes_read = read(fd, &block_read, sizeof(struct saved_data_entry) * BLOCK_MUL); if (bytes_read == -1) { perror("Cannot read\n"); close(fd); free(*samples); return -1; } for (i = 0; i < (bytes_read / sizeof(struct saved_data_entry)); i++, j++) (*samples)[j] = block_read[i]; } while(bytes_read > 0); close(fd); #ifdef VERBOSE_DEBUG for (i = 0; i < num_samples; i++) fprintf(stderr,"(%c) - ACC %llu, CPU %u, PLEN %llu\n", (*samples)[i].access_type, (*samples)[i].access_time, (*samples)[i].cpu, (*samples)[i].preemption_length); #endif return num_samples; } #ifdef WANT_STATISTICS /* * print min, max, avg, stddev for the vector * samples is the size of the population * cpufreq is in MHz */ void print_rough_stats(unsigned long long *vector, int samples, double cpufreq, int wss, int tss) { unsigned long long min, max; long double mi, qi, num_diff; int i; /* manage first value */ mi = vector[0]; qi = 0; min = vector[0]; max = vector[0]; for (i = 1; i < (samples - 1); i++) { if (vector[i] < min) min = vector[i]; if (vector[i] > max) max = vector[i]; num_diff = (long double)(vector[i] - mi); mi += num_diff / ((long double)(i + 1)); qi += ((i) * (num_diff * num_diff)) / ((long double)(i + 1)); } /* unbiased stddev should be computed on (samples - 2) */ /* fprintf(stderr, "CPUFREQ = %f\nValues in tick\n", cpufreq); fprintf(stderr, "max = %llu\nmin = %llu\nmean = %Lf\nstddev = %Lf\n", max, min, mi, sqrtl(qi / (samples - 2))); */ fprintf(stderr, "# wss, tss, max, min, avg, stddev\n"); fprintf(stderr, "%d, %d, %.5f, %.5f, %.5Lf, %.5Lf\n", wss, tss, max / cpufreq, min / cpufreq, mi / cpufreq, sqrtl(qi / (samples - 2)) / cpufreq); } #endif /* * get_valid_ovd(): get valid overheads from trace file * * input: * @filename: input trace file name * * output: * @full_costs: array of all overheads and preemption length associated * with valid measures * * full_costs MUST be initialized before entering this function and MUST * be at least DATAPOINTS long * * @return: number of valid measures read (implicit "true" length of * output array.) * If error return < 0 */ int get_valid_ovd(const char *filename, struct full_ovd_plen **full_costs, int wss, int tss) { struct saved_data_entry *samples; /* total number of samples */ int num_samples; /* number of valid samples */ int scount = 0; int i; /* do we have a valid hot read? */ int valid_hot_reads = 0; /* how many consecutive hot reads? */ int total_hot_reads = 0; /* do we have a valid hot cost? */ int valid_hot_cost = 0; /* are the hot reads valid so far? */ int no_invalid_reads = 1; /* what is the last cpu seen so far? */ unsigned int l_cpu = 0; unsigned long long hot_cost; #ifdef WANT_STATISTICS unsigned long long *valid_c_samples; unsigned long long *valid_h_samples; unsigned long long *valid_p_samples; int c_count; int h_count; int p_count; #endif if ((num_samples = read_sdata_entry(filename, &samples)) < 0) { fprintf(stderr, "Cannot read %s\n", filename); return -1; } /* alloc an upper bound of the number of valid samples we can have */ *full_costs = (struct full_ovd_plen*) malloc(num_samples * sizeof(struct full_ovd_plen)); if (*full_costs == NULL) { fprintf(stderr, "Cannot allocate overhead array\n"); free(samples); return -1; } memset(*full_costs, 0, num_samples * sizeof(struct full_ovd_plen)); #ifdef WANT_STATISTICS valid_c_samples = (unsigned long long *) malloc(num_samples * sizeof(unsigned long long)); if (valid_c_samples == NULL) { fprintf(stderr, "Cannot allocate overhead array\n"); free(samples); return -1; } valid_h_samples = (unsigned long long *) malloc(num_samples * sizeof(unsigned long long)); if (valid_h_samples == NULL) { fprintf(stderr, "Cannot allocate overhead array\n"); free(valid_c_samples); free(samples); return -1; } valid_p_samples = (unsigned long long *) malloc(num_samples * sizeof(unsigned long long)); if (valid_p_samples == NULL) { fprintf(stderr, "Cannot allocate overhead array\n"); free(valid_h_samples); free(valid_c_samples); free(samples); return -1; } memset(valid_c_samples, 0, num_samples * sizeof(unsigned long long)); memset(valid_h_samples, 0, num_samples * sizeof(unsigned long long)); memset(valid_p_samples, 0, num_samples * sizeof(unsigned long long)); c_count = 0; h_count = 0; p_count = 0; #endif #ifdef VERBOSE_DEBUG fprintf(stderr, "Start collected overhead\n"); /* write this on stderr so we can redirect it on a different stream */ for (i = 0; i < num_samples; i++) fprintf(stderr, "(%c) - ACC %llu, CPU %u, PLEN %llu\n", samples[i].access_type, samples[i].access_time, samples[i].cpu, samples[i].preemption_length); fprintf(stderr, "End collected ovrhead\n"); #endif hot_cost = samples[0].access_time; /* get valid overheads reads */ for (i = 0; i < num_samples; i++) { if (samples[i].access_type == 'H' || samples[i].access_type == 'h') { /* NUMHOTREADS consecutive 'H' hot reads should * (hopefully) appear. Take the minimum * of all valid reads up to when the first * invalid 'h' read appears. */ total_hot_reads++; if (no_invalid_reads && samples[i].access_type == 'H') { valid_hot_reads++; if(valid_hot_reads == 1) { hot_cost = samples[i].access_time; } else { hot_cost = min(hot_cost, samples[i].access_time); } } else { /* no valid hot reads found */ no_invalid_reads = 0; } if (total_hot_reads == NUMHOTREADS) { /* check if we have a valid hotread value */ if (valid_hot_reads > 0) valid_hot_cost = 1; else valid_hot_cost = 0; /* reset flags */ valid_hot_reads = 0; total_hot_reads = 0; no_invalid_reads = 1; } /* update last seen cpu */ l_cpu = samples[i].cpu; } else { if (samples[i].access_type == 'P' || samples[i].access_type == 'p') { /* this may be a preemption or a migration * but we do not care now: just report it * if it happened after a valid hot read * and the preemption measure is valid */ if (valid_hot_cost && samples[i].access_type == 'P') { (*full_costs)[scount].curr_cpu = samples[i].cpu; (*full_costs)[scount].last_cpu = l_cpu; (*full_costs)[scount].ovd = (long long) samples[i].access_time - hot_cost; (*full_costs)[scount].plen = (long long) samples[i].preemption_length; dprintf("%u %u %lld %lld\n", (*full_costs)[scount].curr_cpu, (*full_costs)[scount].last_cpu, (*full_costs)[scount].ovd, (*full_costs)[scount].plen); scount++; } /* update last seen cpu */ l_cpu = samples[i].cpu; } } #ifdef WANT_STATISTICS if (samples[i].access_type == 'C') valid_c_samples[c_count++] = samples[i].access_time; else if (samples[i].access_type == 'H') valid_h_samples[h_count++] = samples[i].access_time; else if (samples[i].access_type == 'P') valid_p_samples[p_count++] = samples[i].access_time; #endif } dprintf("End of valid entries\n"); #ifdef WANT_STATISTICS fprintf(stderr, "# Cold cache\n"); print_rough_stats(valid_c_samples, c_count, CFREQ, wss, tss); fprintf(stderr, "# Hot cache\n"); print_rough_stats(valid_h_samples, h_count, CFREQ, wss, tss); fprintf(stderr, "# After preemption\n"); print_rough_stats(valid_p_samples, p_count, CFREQ, wss, tss); fprintf(stderr, "## Nsamples(c,h,p): %d, %d, %d\n", c_count, h_count, p_count); free(valid_p_samples); free(valid_h_samples); free(valid_c_samples); #endif free(samples); return scount; } /* * TODO we are not using this function anymore as the description of the * cpus topology for our systems (xeon) doesn't match the cpu * number assignment implied by this function to work correctly. * Should be fixed at some point (also because i7 uses a different * cpu assignment). --- See below for the currently used function. * * get_ovd_plen(): get overheads and preemption/migration length for * different cores configurations * * For most architecture we can have at most 3 cache levels on the same chip * and then off chip migrations. In the worst case we need to measure: * [1] same core preemption, [2] same L2 migration, * [3] same L3 (different L2, same chip) migration, [4] off chip migration. * * Linux is processing _physical_ CPUs in a "linear" fashion, assigning a * sequence number to one core on a physical cpu and then jumping * on the next physical cpu. Look in sysfs for more details on cpu * topology. This doesn't seems to apply to NUMA machines (e.g., Opteron 8212, * Pound -> Nehalem i7) so the following function is probably working there * but we need to check the topology first... * * input: * @full_costs: see get_valid_ovd() * @num_samples: number of meaningful samples in full_costs * (and in output arrays) * @cores_per_l2: how many cores share an l2 cache (read below) * @cores_per_chip: guess :) * * output: * @preempt: [1] * @samel2: [2] * @samechip: [3] * @offchip: [4] * * if samel2 is NULL, then L3 is not present and samel2 is equivalent to * samechip. cores_per_l2 should be equal to cores_per_chip, but is not used. */ void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples, unsigned int cores_per_l2, unsigned int cores_per_chip, struct ovd_plen *preempt, int *pcount, struct ovd_plen *samel2, int *l2count, struct ovd_plen *samechip, int *chipcount, struct ovd_plen *offchip, int *offcount) { int i; *pcount = 0; *l2count = 0; *chipcount = 0; *offcount = 0; unsigned int curr_cpu; unsigned int last_cpu; for (i = 0; i < num_samples; i++) { dprintf("i = %d\n", i); curr_cpu = full_costs[i].curr_cpu; last_cpu = full_costs[i].last_cpu; if (curr_cpu == last_cpu) { dprintf("preempt\n"); /* preemption */ preempt[*pcount].ovd = full_costs[i].ovd; preempt[*pcount].plen = full_costs[i].plen; (*pcount)++; continue; } if (samel2) { dprintf("l2\n"); if ((curr_cpu / cores_per_l2) == (last_cpu / cores_per_l2)) { dprintf("same L2\n"); /* same L2 migration */ samel2[*l2count].ovd = full_costs[i].ovd; samel2[*l2count].plen = full_costs[i].plen; (*l2count)++; continue; } if (((curr_cpu / cores_per_l2) != (last_cpu / cores_per_l2)) && ((curr_cpu / cores_per_chip) == (last_cpu / cores_per_chip))) { dprintf("same L3\n"); /* same L3 migration */ samechip[*chipcount].ovd = full_costs[i].ovd; samechip[*chipcount].plen = full_costs[i].plen; (*chipcount)++; continue; } } else { dprintf("same chip\n"); /* samel2 == NULL */ /* check same chip migration */ if ((curr_cpu / cores_per_chip) == (last_cpu / cores_per_chip)) { samechip[*chipcount].ovd = full_costs[i].ovd; samechip[*chipcount].plen = full_costs[i].plen; (*chipcount)++; continue; } } dprintf("offchip\n"); /* if we are here it should have been a offchip migration */ offchip[*offcount].ovd = full_costs[i].ovd; offchip[*offcount].plen = full_costs[i].plen; (*offcount)++; } dprintf("pcount = %d\n", *pcount); dprintf("chipcount = %d\n", *chipcount); dprintf("l2count = %d\n", *l2count); dprintf("offcount = %d\n", *offcount); } /* * get_ovd_plen_umaxeon(): get overheads and preemption/migration length * for different cores conf. on uma xeon * * See above comments. This should probably work on most xeon (at least on * jupiter and ludwig) * * input: * @full_costs: see get_valid_ovd() * @num_samples: number of meaningful samples in full_costs * (and in output arrays) * @cores_per_l2: how many cores share an l2 cache (read below) * @num_phys_cpu: guess :) * * output: * @preempt: [1] * @samel2: [2] * @samechip: [3] * @offchip: [4] * * FIXME: samel2 == NULL to say that L3 is not there... is tricky... * if samel2 is NULL, then L3 is not present and samel2 is equivalent to * samechip. cores_per_l2 should be equal to cores_per_chip, but is not used. */ void get_ovd_plen_umaxeon(struct full_ovd_plen *full_costs, int num_samples, unsigned int cores_per_l2, unsigned int num_phys_cpu, struct ovd_plen *preempt, int *pcount, struct ovd_plen *samel2, int *l2count, struct ovd_plen *samechip, int *chipcount, struct ovd_plen *offchip, int *offcount) { int i; *pcount = 0; *l2count = 0; *chipcount = 0; *offcount = 0; unsigned int curr_cpu; unsigned int last_cpu; for (i = 0; i < num_samples; i++) { dprintf("i = %d\n", i); curr_cpu = full_costs[i].curr_cpu; last_cpu = full_costs[i].last_cpu; if (curr_cpu == last_cpu) { dprintf("preempt\n"); /* preemption */ preempt[*pcount].ovd = full_costs[i].ovd; preempt[*pcount].plen = full_costs[i].plen; (*pcount)++; continue; } if ((curr_cpu % num_phys_cpu) == (last_cpu % num_phys_cpu)) { /* ok, both cpus on the same chip, which caches do they shares? */ if (samel2) { /* we have both L3 and L2. * We already know we are sharing L3 */ if (((curr_cpu / num_phys_cpu) / cores_per_l2) == ((last_cpu / num_phys_cpu) / cores_per_l2)) { /* they share also L2 */ dprintf("same L2\n"); samel2[*l2count].ovd = full_costs[i].ovd; samel2[*l2count].plen = full_costs[i].plen; (*l2count)++; continue; } else { /* this is an L3 migration */ dprintf("same L3\n"); samechip[*chipcount].ovd = full_costs[i].ovd; samechip[*chipcount].plen = full_costs[i].plen; (*chipcount)++; continue; } } else { /* ok, just L2 on this machine, this is an L2 migration */ samechip[*chipcount].ovd = full_costs[i].ovd; samechip[*chipcount].plen = full_costs[i].plen; (*chipcount)++; continue; } } dprintf("offchip\n"); /* if we are here it should have been an offchip migration */ offchip[*offcount].ovd = full_costs[i].ovd; offchip[*offcount].plen = full_costs[i].plen; (*offcount)++; } dprintf("pcount = %d\n", *pcount); dprintf("chipcount = %d\n", *chipcount); dprintf("l2count = %d\n", *l2count); dprintf("offcount = %d\n", *offcount); }