/*
 * pm_common.c
 *
 * Read / write data samples on file in binary format
 * Perform first elaboration on the (possibily big) samples set
 */
#include "pm_common.h"

#define BLOCK_MUL 500
#define SBLOCK_SIZE 1024

#define NUMHOTREADS 3
#define min(a,b) ((a)<(b)?(a):(b))

#ifdef DEBUG
#define dprintf(arg...) fprintf(stderr,arg)
#else
#define dprintf(arg...)
#endif

/* simple sequential write on disk.
 * (concurrent writes must be protected)
 *
 * saved_data_entry is ~ 20 B; so 100000 Datapoinst are ~ 2MB
 */
int serialize_data_entry(char *filename, struct data_entry *samples, int num)
{
	int fd;
	int i, j;

	/* buffer some data in memory before writing */
	struct saved_data_entry to_save[SBLOCK_SIZE];

	fd = open(filename, O_WRONLY | O_APPEND | O_CREAT, 0660);
	if (fd == -1){
		perror("open");
		return -1;
	}

	for (i = 0; i < num / SBLOCK_SIZE; i++) {
		memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
		for (j = 0; j < SBLOCK_SIZE; j++) {
			to_save[j].access_type = samples[j].access_type;
			to_save[j].access_time =
				samples[j].access_time;
			to_save[j].cpu = samples[j].cpu;
			to_save[j].preemption_length =
				samples[j].preemption_length;
		}

		samples = &samples[j];

		if (write(fd, to_save, sizeof(struct saved_data_entry) * SBLOCK_SIZE) == -1) {
			close(fd);
			perror("Write failed\n");
			return -1;
		}
	}

	memset(to_save, 0, sizeof(struct saved_data_entry) * SBLOCK_SIZE);
	for (j = 0; j < num % SBLOCK_SIZE; j++) {
		to_save[j].access_type = samples[j].access_type;
		to_save[j].access_time =
			samples[j].access_time;
		to_save[j].cpu = samples[j].cpu;
		to_save[j].preemption_length =
			samples[j].preemption_length;
	}

	if (write(fd, to_save, sizeof(struct saved_data_entry) * j) == -1) {
		close(fd);
		perror("Write failed\n");
		return -1;
	}

	dprintf("Written %d entries\n", i*SBLOCK_SIZE + j);

	close(fd);
	return 0;
}

/*
 * Presumably, all data will be written on little endian machines.
 * I assume the binary format is little endian
 *
 * return -1 on error
 * return number of samples on success
 */
int read_sdata_entry(const char *filename, struct saved_data_entry **samples)
{
	int fd;
	int i,j;

	int num_samples, file_size;
	struct saved_data_entry block_read[BLOCK_MUL];

	int bytes_read;

	fd = open(filename, O_RDONLY);
	if(fd == -1){
		perror("open");
		return -1;
	}

	/* Compute file size */
	file_size = lseek(fd, 0, SEEK_END);
	if(file_size == -1){
		close(fd);
		perror("lseek");
		return -1;
	}

	/* Return to start position */
	if(lseek(fd, 0, SEEK_SET) == -1){
		close(fd);
		perror("lseek");
		return -1;
	}

	num_samples = file_size / sizeof(struct saved_data_entry);
	dprintf("N entries: %d\n", num_samples);

	/* Allocate memory for data_entry samples */
	*samples = (struct saved_data_entry *) (malloc(num_samples *
					sizeof(struct saved_data_entry)));
	if(*samples == NULL){
		close(fd);
		perror("malloc");
		return -1;
	}

	/* Read all the file */
	j = 0;
	do {
		/* Read file (in BLOCK_MUL * sizeof(saved_data_entrty) block size) */
		bytes_read = read(fd, &block_read, sizeof(struct saved_data_entry) * BLOCK_MUL);
		if (bytes_read == -1) {
			perror("Cannot read\n");
			close(fd);
			free(*samples);
			return -1;
		}

		for (i = 0; i < (bytes_read / sizeof(struct saved_data_entry)); i++, j++)
			(*samples)[j] = block_read[i];

	} while(bytes_read > 0);

	close(fd);

#ifdef DEBUG
	for (i = 0; i < num_samples; i++)
		fprintf(stderr,"(%c) - ACC %llu, CPU %u, PLEN %llu\n",
				(*samples)[i].access_type,
				(*samples)[i].access_time, (*samples)[i].cpu,
				(*samples)[i].preemption_length);
#endif
	return num_samples;
}

/*
 * get_valid_ovd(): get valid overheads from trace file
 *
 * input:
 * @filename:	input trace file name
 *
 * output:
 * @full_costs: array of all overheads and preemption length associated
 * 		with valid measures
 *
 * full_costs MUST be initialized before entering this function and MUST
 * be at least DATAPOINTS long
 *
 * @return:	number of valid measures read (implicit "true" length of
 *		output array.)
 *		If error return < 0
 */
int get_valid_ovd(const char *filename, struct full_ovd_plen *full_costs)
{
	struct saved_data_entry *samples;
	/* total number of samples */
	int num_samples;
	/* number of valid samples */
	int scount = 0;

	int i;

	/* do we have a valid hot read? */
	int valid_hot_reads = 0;
	/* how many consecutive hot reads? */
	int total_hot_reads = 0;
	/* do we have a valid hot cost? */
	int valid_hot_cost = 0;
	/* are the hot reads valid so far? */
	int no_invalid_reads = 1;
	/* what is the last cpu seen so far? */
	unsigned int l_cpu = 0;

	unsigned long long hot_cost;

	/* if output array isn't long enough, early segfault */
	memset(full_costs, 0, DATAPOINTS * sizeof(struct full_ovd_plen));

	if ((num_samples = read_sdata_entry(filename, &samples)) < 0) {
		printf("Cannot read %s\n", filename);
		return -1;
	}

#ifdef DEBUG
	fprintf(stderr, "Start Valid overhead\n");
	/* write this on stderr so we can redirect it on a different stream */
	for (i = 0; i < num_samples; i++)
		fprintf(stderr, "(%c) - ACC %llu, CPU %u, PLEN %llu\n",
				samples[i].access_type,
				samples[i].access_time, samples[i].cpu,
				samples[i].preemption_length);
	fprintf(stderr, "End Valid ovrhead\n");
#endif
	hot_cost = samples[0].access_time;
	/* get valid overheads reads */
	for (i = 0; i < num_samples; i++) {

		if (samples[i].access_type == 'H' ||
			samples[i].access_type == 'h') {
			/* NUMHOTREADS consecutive 'H' hot reads should
			 * (hopefully) appear. Take the minimum
			 * of all valid reads up to when the first
			 * invalid 'h' read appears.
			 */
			total_hot_reads++;
			if (no_invalid_reads && samples[i].access_type == 'H') {

				valid_hot_reads++;
				if(valid_hot_reads == 1) {
					hot_cost = samples[i].access_time;
					fprintf(stderr, "h1 = %llu\n", hot_cost);
				}
				else {
					hot_cost = min(hot_cost, samples[i].access_time);
					fprintf(stderr, "hm = %llu\n", hot_cost);
				}

			} else {
				/* no valid hot reads found */
				no_invalid_reads = 0;
			}

			if (total_hot_reads == NUMHOTREADS) {
				/* check if we have a valid hotread value */
				if (valid_hot_reads > 0)
					valid_hot_cost = 1;
				else
					valid_hot_cost = 0;

				/* reset flags */
				valid_hot_reads = 0;
				total_hot_reads = 0;
				no_invalid_reads = 1;
			}

			/* update last seen cpu */
			l_cpu = samples[i].cpu;

		} else {
			if (samples[i].access_type == 'P' ||
				samples[i].access_type == 'p') {

				/* this may be a preemption or a migration
				 * but we do not care now: just report it
				 * if it happened after a valid hot read
				 * and the preemption measure is valid
				 */
				if (valid_hot_cost && samples[i].access_type == 'P') {

					full_costs[scount].curr_cpu = samples[i].cpu;
					full_costs[scount].last_cpu = l_cpu;
					full_costs[scount].ovd = (long long)
						samples[i].access_time - hot_cost;

					fprintf(stderr, "hs = %llu\n", hot_cost);
					fprintf(stderr, "s1 = %llu\n", samples[i].access_time);
					fprintf(stderr, "o1 = %lld\n", full_costs[scount].ovd);

					full_costs[scount].plen = (long long)
						samples[i].preemption_length;

					dprintf("%u %u %lld %lld\n", full_costs[scount].curr_cpu,
							full_costs[scount].last_cpu,
							full_costs[scount].ovd, full_costs[scount].plen);

					scount++;
				}

				/* update last seen cpu */
				l_cpu = samples[i].cpu;
			}
		}
	}

	dprintf("End of valid entries\n");

	free(samples);
	return scount;
}

/*
 * get_ovd_plen(): 	get overheads and preemption/migration length for
 * 			different cores configurations
 *
 * For most architecture we can have at most 3 cache levels on the same chip
 * and then off chip migrations. In the worst case we need to measure:
 * [1] same core preemption, [2] same L2 migration,
 * [3] same L3 (different L2, same chip) migration, [4] off chip migration.
 *
 * input:
 * @full_costs:		see get_valid_ovd()
 * @num_samples:	number of meaningful samples in full_costs
 *			(and in output arrays)
 * @cores_per_l2:	how many cores share an l2 cache (read below)
 * @cores_per_chip:	guess :)
 *
 * output:
 * @preempt:		[1]
 * @samel2:		[2]
 * @samechip:		[3]
 * @offchip:		[4]
 *
 * if samel2 is NULL, then L3 is not present and samel2 is equivalent to
 * samechip. cores_per_l2 should be equal to cores_per_chip, but is not used.
 */
void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples,
		unsigned int cores_per_l2, unsigned int cores_per_chip,
		struct ovd_plen *preempt, int *pcount,
		struct ovd_plen *samel2, int *l2count,
		struct ovd_plen *samechip, int *chipcount,
		struct ovd_plen *offchip, int *offcount)
{
	int i;
	*pcount = 0;
	*l2count = 0;
	*chipcount = 0;
	*offcount = 0;

	for (i = 0; i < num_samples; i++) {
		dprintf("i = %d\n", i);

		if (full_costs[i].curr_cpu == full_costs[i].last_cpu) {
			dprintf("preempt\n");
			/* preemption */
			preempt[*pcount].ovd = full_costs[i].ovd;
			preempt[*pcount].plen = full_costs[i].plen;
			(*pcount)++;

			continue;

		}

		if (samel2) {
			dprintf("l2\n");

			if ((full_costs[i].curr_cpu / cores_per_l2) == (full_costs[i].last_cpu / cores_per_l2)) {
				dprintf("same L2\n");
				/* same L2 migration */
				samel2[*l2count].ovd = full_costs[i].ovd;
				samel2[*l2count].plen = full_costs[i].plen;
				(*l2count)++;

				continue;
			}

			if (((full_costs[i].curr_cpu / cores_per_l2) != (full_costs[i].last_cpu / cores_per_l2)) &&
					((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip))) {
				dprintf("same L3\n");
				/* same L3 migration */
				samechip[*chipcount].ovd = full_costs[i].ovd;
				samechip[*chipcount].plen = full_costs[i].plen;
				(*chipcount)++;

				continue;
			}
		} else {
			dprintf("same chip\n");
			/* samel2 == NULL */
			/* check same chip migration */
			if ((full_costs[i].curr_cpu / cores_per_chip) == (full_costs[i].last_cpu / cores_per_chip)) {

				samechip[*chipcount].ovd = full_costs[i].ovd;
				samechip[*chipcount].plen = full_costs[i].plen;
				(*chipcount)++;

				continue;
			}
		}
		dprintf("offchip\n");
		/* if we are here it should have been a offchip migration */
		offchip[*offcount].ovd = full_costs[i].ovd;
		offchip[*offcount].plen = full_costs[i].plen;
		(*offcount)++;
	}
	dprintf("pcount = %d\n", *pcount);
	dprintf("chipcount = %d\n", *chipcount);
	dprintf("l2count = %d\n", *l2count);
	dprintf("offcount = %d\n", *offcount);
}