aboutsummaryrefslogblamecommitdiffstats
path: root/bin/uncache.c
blob: 517015598dc0d968326fc10ab552ab8da9f37208 (plain) (tree)
1
2
3
4
5
6
7
8
9
10









                      
                     








































































































































































































                                                                                         
                                                       





























































































                                                                                                                       
                                                                              









































































                                                           
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <sched.h>
#include <assert.h>
#include <string.h>
#include <stdint.h>
#include <sys/fcntl.h>
#include <sys/mman.h>
#include <inttypes.h>

/* Test tool for validating Litmus's uncache device.     */
/* Tool also capable basic cache vs. sysmem statistics.  */
/* Compile with '-O2' for significaintly greater margins */
/* in performance between cache and sysmem:              */
/* (Intel Xeon X5650)                                    */
/*    -g -> uncache is 30x slower                        */
/*    -O2 -> uncache is >100x slower                     */

int PAGE_SIZE;
#define NR_PAGES 16

#define UNCACHE_DEV "/dev/litmus/uncache"

/* volatile forces a read from memory (or cache) on every reference. Note
   that volatile does not keep data out of the cache! */
typedef volatile char* pbuf_t;

/* hit the first byte in each page.
   addr must be page aligned. */
inline int linear_write(pbuf_t addr, int size, char val)
{
	pbuf_t end = addr + size;
	pbuf_t step;
	int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE;
	int times = nr_pages * PAGE_SIZE;
	int i;

	for (i = 0; i < times; ++i)
		for(step = addr; step < end; step += PAGE_SIZE)
			*step = val;
	return 0;
}
inline int linear_read(pbuf_t addr, int size, char val)
{
	pbuf_t end = addr + size;
	pbuf_t step;
	int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE;
	int times = nr_pages * PAGE_SIZE;
	int i;

	for (i = 0; i < times; ++i)
		for(step = addr; step < end; step += PAGE_SIZE) {
			if (*step != val)
				return -1;
		}
	return 0;
}

/* write to *data nr times. */
inline int hammer_write(pbuf_t data, char val, int nr)
{
	int i;
	for (i = 0; i < nr; ++i)
		*data = val;
	return 0;
}

/* read from *data nr times. */
inline int hammer_read(pbuf_t data, char val, int nr)
{
	int i;
	for (i = 0; i < nr; ++i) {
		if (*data != val)
			return -1;
	}
	return 0;
}

inline int test(pbuf_t data, int size, int trials)
{
	int HAMMER_TIME = 10000;  /* can't cache this! */
	char VAL = 0x55;
	int t;
	for(t = 0; t < trials; ++t) {

#if 0
		if (linear_write(data, size, VAL) != 0) {
			printf("failed linear_write()\n");
			return -1;
		}
		if (linear_read(data, size, VAL) != 0) {
			printf("failed linear_read()\n");
			return -1;
		}
#endif

		/* hammer at the first byte in the array */
		if (hammer_write(data, VAL, HAMMER_TIME) != 0) {
			printf("failed hammer_write()\n");
			return -1;
		}
		if (hammer_read(data, VAL, HAMMER_TIME) != 0) {
			printf("failed hammer_read()\n");
			return -1;
		}
	}
	return 0;
}

inline void timespec_normalize(struct timespec* ts, time_t sec, int64_t nsec)
{
	while(nsec > 1000000000LL) {
		asm("" : "+rm"(nsec));
		nsec -= 1000000000LL;
		++sec;
	}
	while(nsec < 0) {
		asm("" : "+rm"(nsec));
		nsec += 1000000000LL;
		--sec;
	}

	ts->tv_sec = sec;
	ts->tv_nsec = nsec;
}

inline struct timespec timespec_sub(struct timespec lhs, struct timespec rhs)
{
	struct timespec delta;
	timespec_normalize(&delta, lhs.tv_sec - rhs.tv_sec, lhs.tv_nsec - rhs.tv_nsec);
	return delta;
}

inline struct timespec timespec_add(struct timespec lhs, struct timespec rhs)
{
	struct timespec delta;
	timespec_normalize(&delta, lhs.tv_sec + rhs.tv_sec, lhs.tv_nsec + rhs.tv_nsec);
	return delta;
}

inline int64_t timespec_to_us(struct timespec ts)
{
	int64_t t;
	t = ts.tv_sec * 1000000LL;
	t += ts.tv_nsec / 1000LL;
	return t;
}

/* hammers away at the first byte in each mmaped page and
   times how long it took. */
int do_data(int do_uncache, int64_t* time)
{
	int size;
	int prot = PROT_READ | PROT_WRITE;
	int flags = MAP_PRIVATE;

	pbuf_t data;

	struct sched_param fifo_params;

	struct timespec start, end;
	int64_t elapsed;
	int trials = 1000;

	printf("Running data access test.\n");

	mlockall(MCL_CURRENT | MCL_FUTURE);

	memset(&fifo_params, 0, sizeof(fifo_params));
	fifo_params.sched_priority = sched_get_priority_max(SCHED_FIFO);

	size = PAGE_SIZE*NR_PAGES;

	printf("Allocating %d %s pages.\n", NR_PAGES, (do_uncache) ?
					"uncacheable" : "cacheable");
	if (do_uncache) {
		int fd = open(UNCACHE_DEV, O_RDWR);
		data = mmap(NULL, size, prot, flags, fd, 0);
		close(fd);
	}
	else {
		/* Accessed data will probably fit in L1, so this will go VERY fast.
		   Code should also have little-to-no pipeline stalls. */
		flags |= MAP_ANONYMOUS;
		data = mmap(NULL, size, prot, flags, -1, 0);
	}
	if (data == MAP_FAILED) {
		printf("Failed to alloc data! "
			   "Are you running Litmus? "
			   "Is Litmus broken?\n");
		return -1;
	}
	else {
		printf("Data allocated at %p.\n", data);
	}

	printf("Beginning tests...\n");
	if (sched_setscheduler(getpid(), SCHED_FIFO, &fifo_params)) {
		printf("(Could not become SCHED_FIFO task.) Are you running as root?\n");
	}

	/* observations suggest that no warmup phase is needed. */
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
	if (test(data, size, trials) != 0) {
		printf("Test failed!\n");
		munmap((char*)data, size);
		return -1;
	}
	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
	elapsed = timespec_to_us(timespec_sub(end, start));
	printf("%s Time: %"PRIi64"us\n", (do_uncache) ?
					"Uncache" : "Cache", elapsed);

	munmap((char*)data, size);

	if(time)
		*time = elapsed;

	return 0;
}

/* compares runtime of cached vs. uncached */
int do_data_compare()
{
	const double thresh = 1.3;
	int ret = 0;
	double ratio;
	int64_t cache_time = 0, uncache_time = 0;

	printf("Timing cached pages...\n");
	ret = do_data(0, &cache_time);
	if (ret != 0)
		goto out;

	printf("Timing uncached pages...\n");
	ret = do_data(1, &uncache_time);
	if (ret != 0)
		goto out;

	ratio = (double)uncache_time/(double)cache_time;
	printf("Uncached/Cached Ratio: %f\n", ratio);

	if (ratio < thresh) {
		printf("Ratio is unexpectedly small (< %f)! "
				" Uncache broken? Are you on kvm?\n", thresh);
		ret = -1;
	}

out:
	return ret;
}

/* tries to max out uncache allocations.
   under normal conditions (non-mlock),
   pages should spill into swap. uncache
   pages are not locked in memory. */
int do_max_alloc(void)
{
	int fd;
	int good = 1;
	int count = 0;
	uint64_t mmap_size = PAGE_SIZE; /* start at one page per mmap */

	/* half of default limit on ubuntu. (see /proc/sys/vm/max_map_count) */
	int max_mmaps = 32765;
	volatile char** maps = calloc(max_mmaps, sizeof(pbuf_t));

	if (!maps) {
		printf("failed to alloc pointers for pages\n");
		return -1;
	}

	printf("Testing max amount of uncache data. System may get wonkie (OOM Killer)!\n");

	fd = open(UNCACHE_DEV, O_RDWR);
	do {
		int i;
		int nr_pages = mmap_size/PAGE_SIZE;
		printf("Testing mmaps of %d pages.\n", nr_pages);

		count = 0;
		for (i = 0; (i < max_mmaps) && good; ++i) {
			pbuf_t data = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE, fd, 0);

			if (data != MAP_FAILED) {
				maps[i] = data;
				++count;
			}
			else {
				perror(NULL);
				good = 0;
			}
		}
		for (i = 0; i < count; ++i) {
			if (maps[i])
				munmap((char*)(maps[i]), mmap_size);
		}
		memset(maps, 0, sizeof(maps[0])*max_mmaps);

		mmap_size *= 2; /* let's do it again with bigger allocations */
	}while(good);

	free(maps);
	close(fd);

	printf("Maxed out allocs with %d mmaps of %"PRIu64" pages in size.\n",
		count, mmap_size/PAGE_SIZE);

	return 0;
}

typedef enum
{
	UNCACHE,
	CACHE,
	COMPARE,
	MAX_ALLOC
} test_t;

#define OPTSTR "ucxa"
int main(int argc, char** argv)
{
	int ret;
	test_t test = UNCACHE;
	int opt;
	PAGE_SIZE = sysconf(_SC_PAGE_SIZE);

	while((opt = getopt(argc, argv, OPTSTR)) != -1) {
		switch(opt) {
			case 'c':
				test = CACHE;
				break;
			case 'u':
				test = UNCACHE;
				break;
			case 'x':
				test = COMPARE;
				break;
			case 'a':
				test = MAX_ALLOC;
				break;
			case ':':
				printf("missing option\n");
				exit(-1);
			case '?':
			default:
				printf("bad argument\n");
				exit(-1);
		}
	}


	printf("Page Size: %d\n", PAGE_SIZE);

	switch(test)
	{
	case CACHE:
		ret = do_data(0, NULL);
		break;
	case UNCACHE:
		ret = do_data(1, NULL);
		break;
	case COMPARE:
		ret = do_data_compare();
		break;
	case MAX_ALLOC:
		ret = do_max_alloc();
		break;
	default:
		printf("invalid test\n");
		ret = -1;
		break;
	}

	if (ret != 0) {
		printf("Test failed.\n");
	}

	return ret;
}