From 072b486393ab702eea9ea0a7fef569dbf8be0a32 Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Fri, 8 Mar 2013 01:25:35 -0500
Subject: uncachedev: uncache test tool.

This adds the uncache test tool (bin/uncache.c). The tool
can be used to test Litmus's char device driver for allocating
uncacheable CPU memory. The tool runs various checks and
gathers basic cache vs. main memory statistics.

In the future, uncache could be extended to quantify the
benefits of the L1, L2, and L3 caches, instead of treating
them as a black box.

Note: Uncache works best when compiled with '-O2'.  While '-O2'
has not been added to the Makefile, other code was updated (code
in tests/ and rtspin), to compile with -O2.

DEPENDS UPON LITMUS-RT PATCH 888d097deb6d1fdc0c89a4f9667fd81cf416cfc7.
---
 Makefile      |   5 +-
 bin/rtspin.c  |   6 +-
 bin/uncache.c | 381 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/fdso.c  |   4 +-
 tests/locks.c |   8 +-
 tests/pcp.c   |   8 +-
 6 files changed, 398 insertions(+), 14 deletions(-)
 create mode 100644 bin/uncache.c

diff --git a/Makefile b/Makefile
index 8195752..5432edd 100644
--- a/Makefile
+++ b/Makefile
@@ -71,7 +71,7 @@ AR  := ${CROSS_COMPILE}${AR}
 
 all     = lib ${rt-apps}
 rt-apps = cycles base_task rt_launch rtspin release_ts measure_syscall \
-	  base_mt_task runtests
+	  base_mt_task uncache runtests
 
 .PHONY: all lib clean dump-config TAGS tags cscope help
 
@@ -215,6 +215,9 @@ obj-rt_launch = rt_launch.o common.o
 obj-rtspin = rtspin.o common.o
 lib-rtspin = -lrt
 
+obj-uncache = uncache.o
+lib-uncache = -lrt
+
 obj-release_ts = release_ts.o
 
 obj-measure_syscall = null_call.o
diff --git a/bin/rtspin.c b/bin/rtspin.c
index 5054d0b..657a94c 100644
--- a/bin/rtspin.c
+++ b/bin/rtspin.c
@@ -80,7 +80,7 @@ static void get_exec_times(const char *file, const int column,
 
 		for (cur_col = 1; cur_col < column; ++cur_col) {
 			/* discard input until we get to the column we want */
-			fscanf(fstream, "%*s,");
+			int unused __attribute__ ((unused)) = fscanf(fstream, "%*s,");
 		}
 
 		/* get the desired exec. time */
@@ -200,11 +200,11 @@ int main(int argc, char** argv)
 	int column = 1;
 	const char *file = NULL;
 	int want_enforcement = 0;
-	double duration = 0, start;
+	double duration = 0, start = 0;
 	double *exec_times = NULL;
 	double scale = 1.0;
 	task_class_t class = RT_CLASS_HARD;
-	int cur_job, num_jobs;
+	int cur_job = 0, num_jobs = 0;
 
 	/* locking */
 	int lock_od = -1;
diff --git a/bin/uncache.c b/bin/uncache.c
new file mode 100644
index 0000000..b6f6913
--- /dev/null
+++ b/bin/uncache.c
@@ -0,0 +1,381 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sched.h>
+#include <assert.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+
+/* Test tool for validating Litmus's uncache device.     */
+/* Tool also capable basic cache vs. sysmem statistics.  */
+/* Compile with '-O2' for significaintly greater margins */
+/* in performance between cache and sysmem:              */
+/* (Intel Xeon X5650)                                    */
+/*    -g -> uncache is 30x slower                        */
+/*    -O2 -> uncache is >100x slower                     */
+
+int PAGE_SIZE;
+#define NR_PAGES 16
+
+#define UNCACHE_DEV "/dev/litmus/uncache"
+
+/* volatile forces a read from memory (or cache) on every reference. Note
+   that volatile does not keep data out of the cache! */
+typedef volatile char* pbuf_t;
+
+/* hit the first byte in each page.
+   addr must be page aligned. */
+inline int linear_write(pbuf_t addr, int size, char val)
+{
+	pbuf_t end = addr + size;
+	pbuf_t step;
+	int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE;
+	int times = nr_pages * PAGE_SIZE;
+	int i;
+
+	for (i = 0; i < times; ++i)
+		for(step = addr; step < end; step += PAGE_SIZE)
+			*step = val;
+	return 0;
+}
+inline int linear_read(pbuf_t addr, int size, char val)
+{
+	pbuf_t end = addr + size;
+	pbuf_t step;
+	int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE;
+	int times = nr_pages * PAGE_SIZE;
+	int i;
+
+	for (i = 0; i < times; ++i)
+		for(step = addr; step < end; step += PAGE_SIZE) {
+			if (*step != val)
+				return -1;
+		}
+	return 0;
+}
+
+/* write to *data nr times. */
+inline int hammer_write(pbuf_t data, char val, int nr)
+{
+	int i;
+	for (i = 0; i < nr; ++i)
+		*data = val;
+	return 0;
+}
+
+/* read from *data nr times. */
+inline int hammer_read(pbuf_t data, char val, int nr)
+{
+	int i;
+	for (i = 0; i < nr; ++i) {
+		if (*data != val)
+			return -1;
+	}
+	return 0;
+}
+
+inline int test(pbuf_t data, int size, int trials)
+{
+	int HAMMER_TIME = 10000;  /* can't cache this! */
+	char VAL = 0x55;
+	int t;
+	for(t = 0; t < trials; ++t) {
+
+#if 0
+		if (linear_write(data, size, VAL) != 0) {
+			printf("failed linear_write()\n");
+			return -1;
+		}
+		if (linear_read(data, size, VAL) != 0) {
+			printf("failed linear_read()\n");
+			return -1;
+		}
+#endif
+
+		/* hammer at the first byte in the array */
+		if (hammer_write(data, VAL, HAMMER_TIME) != 0) {
+			printf("failed hammer_write()\n");
+			return -1;
+		}
+		if (hammer_read(data, VAL, HAMMER_TIME) != 0) {
+			printf("failed hammer_read()\n");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+inline void timespec_normalize(struct timespec* ts, time_t sec, int64_t nsec)
+{
+	while(nsec > 1000000000LL) {
+		asm("" : "+rm"(nsec));
+		nsec -= 1000000000LL;
+		++sec;
+	}
+	while(nsec < 0) {
+		asm("" : "+rm"(nsec));
+		nsec += 1000000000LL;
+		--sec;
+	}
+
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+inline struct timespec timespec_sub(struct timespec lhs, struct timespec rhs)
+{
+	struct timespec delta;
+	timespec_normalize(&delta, lhs.tv_sec - rhs.tv_sec, lhs.tv_nsec - rhs.tv_nsec);
+	return delta;
+}
+
+inline struct timespec timespec_add(struct timespec lhs, struct timespec rhs)
+{
+	struct timespec delta;
+	timespec_normalize(&delta, lhs.tv_sec + rhs.tv_sec, lhs.tv_nsec + rhs.tv_nsec);
+	return delta;
+}
+
+inline int64_t timespec_to_us(struct timespec ts)
+{
+	int64_t t;
+	t = ts.tv_sec * 1000000LL;
+	t += ts.tv_nsec / 1000LL;
+	return t;
+}
+
+/* hammers away at the first byte in each mmaped page and
+   times how long it took. */
+int do_data(int do_uncache, int64_t* time)
+{
+	int size;
+	int prot = PROT_READ | PROT_WRITE;
+	int flags = MAP_PRIVATE;
+
+	pbuf_t data;
+
+	struct sched_param fifo_params;
+
+	struct timespec start, end;
+	int64_t elapsed;
+	int trials = 1000;
+
+	printf("Running data access test.\n");
+
+	mlockall(MCL_CURRENT | MCL_FUTURE);
+
+	memset(&fifo_params, 0, sizeof(fifo_params));
+	fifo_params.sched_priority = sched_get_priority_max(SCHED_FIFO);
+
+	size = PAGE_SIZE*NR_PAGES;
+
+	printf("Allocating %d %s pages.\n", NR_PAGES, (do_uncache) ?
+					"uncacheable" : "cacheable");
+	if (do_uncache) {
+		int fd = open(UNCACHE_DEV, O_RDWR);
+		data = mmap(NULL, size, prot, flags, fd, 0);
+		close(fd);
+	}
+	else {
+		/* Accessed data will probably fit in L1, so this will go VERY fast.
+		   Code should also have little-to-no pipeline stalls. */
+		flags |= MAP_ANONYMOUS;
+		data = mmap(NULL, size, prot, flags, -1, 0);
+	}
+	if (data == MAP_FAILED) {
+		printf("Failed to alloc data! "
+			   "Are you running Litmus? "
+			   "Is Litmus broken?\n");
+		return -1;
+	}
+	else {
+		printf("Data allocated at %p.\n", data);
+	}
+
+	printf("Beginning tests...\n");
+	if (sched_setscheduler(getpid(), SCHED_FIFO, &fifo_params)) {
+		printf("(Could not become SCHED_FIFO task.) Are you running as root?\n");
+	}
+
+	/* observations suggest that no warmup phase is needed. */
+	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
+	if (test(data, size, trials) != 0) {
+		printf("Test failed!\n");
+		munmap((char*)data, size);
+		return -1;
+	}
+	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
+	elapsed = timespec_to_us(timespec_sub(end, start));
+	printf("%s Time: %ldus\n", (do_uncache) ?
+					"Uncache" : "Cache", elapsed);
+
+	munmap((char*)data, size);
+
+	if(time)
+		*time = elapsed;
+
+	return 0;
+}
+
+/* compares runtime of cached vs. uncached */
+int do_data_compare()
+{
+	const double thresh = 1.3;
+	int ret = 0;
+	double ratio;
+	int64_t cache_time = 0, uncache_time = 0;
+
+	printf("Timing cached pages...\n");
+	ret = do_data(0, &cache_time);
+	if (ret != 0)
+		goto out;
+
+	printf("Timing uncached pages...\n");
+	ret = do_data(1, &uncache_time);
+	if (ret != 0)
+		goto out;
+
+	ratio = (double)uncache_time/(double)cache_time;
+	printf("Uncached/Cached Ratio: %f\n", ratio);
+
+	if (ratio < thresh) {
+		printf("Ratio is unexpectedly small (< %f)! "
+				" Uncache broken? Are you on kvm?\n", thresh);
+		ret = -1;
+	}
+
+out:
+	return ret;
+}
+
+/* tries to max out uncache allocations.
+   under normal conditions (non-mlock),
+   pages should spill into swap. uncache
+   pages are not locked in memory. */
+int do_max_alloc(void)
+{
+	int fd;
+	int good = 1;
+	int count = 0;
+	uint64_t mmap_size = PAGE_SIZE; /* start at one page per mmap */
+
+	/* half of default limit on ubuntu. (see /proc/sys/vm/max_map_count) */
+	int max_mmaps = 32765;
+	volatile char** maps = calloc(max_mmaps, sizeof(pbuf_t));
+
+	if (!maps) {
+		printf("failed to alloc pointers for pages\n");
+		return -1;
+	}
+
+	printf("Testing max amount of uncache data. System may get wonkie (OOM Killer)!\n");
+
+	fd = open(UNCACHE_DEV, O_RDWR);
+	do {
+		int i;
+		int nr_pages = mmap_size/PAGE_SIZE;
+		printf("Testing mmaps of %d pages.\n", nr_pages);
+
+		count = 0;
+		for (i = 0; (i < max_mmaps) && good; ++i) {
+			pbuf_t data = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE, fd, 0);
+
+			if (data != MAP_FAILED) {
+				maps[i] = data;
+				++count;
+			}
+			else {
+				perror(NULL);
+				good = 0;
+			}
+		}
+		for (i = 0; i < count; ++i) {
+			if (maps[i])
+				munmap((char*)(maps[i]), mmap_size);
+		}
+		memset(maps, 0, sizeof(maps[0])*max_mmaps);
+
+		mmap_size *= 2; /* let's do it again with bigger allocations */
+	}while(good);
+
+	free(maps);
+	close(fd);
+
+	printf("Maxed out allocs with %d mmaps of %lu pages in size.\n",
+		count, mmap_size/PAGE_SIZE);
+
+	return 0;
+}
+
+typedef enum
+{
+	UNCACHE,
+	CACHE,
+	COMPARE,
+	MAX_ALLOC
+} test_t;
+
+#define OPTSTR "ucxa"
+int main(int argc, char** argv)
+{
+	int ret;
+	test_t test = UNCACHE;
+	int opt;
+	PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
+
+	while((opt = getopt(argc, argv, OPTSTR)) != -1) {
+		switch(opt) {
+			case 'c':
+				test = CACHE;
+				break;
+			case 'u':
+				test = UNCACHE;
+				break;
+			case 'x':
+				test = COMPARE;
+				break;
+			case 'a':
+				test = MAX_ALLOC;
+				break;
+			case ':':
+				printf("missing option\n");
+				exit(-1);
+			case '?':
+			default:
+				printf("bad argument\n");
+				exit(-1);
+		}
+	}
+
+
+	printf("Page Size: %d\n", PAGE_SIZE);
+
+	switch(test)
+	{
+	case CACHE:
+		ret = do_data(0, NULL);
+		break;
+	case UNCACHE:
+		ret = do_data(1, NULL);
+		break;
+	case COMPARE:
+		ret = do_data_compare();
+		break;
+	case MAX_ALLOC:
+		ret = do_max_alloc();
+		break;
+	default:
+		printf("invalid test\n");
+		ret = -1;
+		break;
+	}
+
+	if (ret != 0) {
+		printf("Test failed.\n");
+	}
+
+	return ret;
+}
diff --git a/tests/fdso.c b/tests/fdso.c
index 8a2a0d0..fda343f 100644
--- a/tests/fdso.c
+++ b/tests/fdso.c
@@ -16,7 +16,7 @@ TESTCASE(fmlp_not_active, C_EDF | PFAIR | LINUX,
 {
 	int fd;
 
-	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	ASSERT(fd != -1);
 
@@ -57,7 +57,7 @@ TESTCASE(not_inherit_od, GSN_EDF | PSN_EDF,
 {
 	int fd, od, pid, status;
 
-	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	SYSCALL( od = open_fmlp_sem(fd, 0) );
 
diff --git a/tests/locks.c b/tests/locks.c
index d7ebfe2..9a928b3 100644
--- a/tests/locks.c
+++ b/tests/locks.c
@@ -11,7 +11,7 @@ TESTCASE(not_lock_fmlp_be, GSN_EDF | PSN_EDF | P_FP,
 {
 	int fd, od;
 
-	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	SYSCALL( od = open_fmlp_sem(fd, 0) );
 
@@ -34,7 +34,7 @@ TESTCASE(not_lock_srp_be, PSN_EDF | P_FP,
 {
 	int fd, od;
 
-	SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	/* BE tasks may not open SRP semaphores */
 
@@ -51,7 +51,7 @@ TESTCASE(lock_srp, PSN_EDF | P_FP,
 {
 	int fd, od;
 
-	SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".srp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	SYSCALL( sporadic_partitioned(10, 100, 0) );
 	SYSCALL( task_mode(LITMUS_RT_TASK) );
@@ -83,7 +83,7 @@ TESTCASE(lock_fmlp, PSN_EDF | GSN_EDF | P_FP,
 {
 	int fd, od;
 
-	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".fmlp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	SYSCALL( sporadic_partitioned(10, 100, 0) );
 	SYSCALL( task_mode(LITMUS_RT_TASK) );
diff --git a/tests/pcp.c b/tests/pcp.c
index 52ee959..ff4259c 100644
--- a/tests/pcp.c
+++ b/tests/pcp.c
@@ -13,7 +13,7 @@ TESTCASE(lock_pcp, P_FP,
 {
 	int fd, od, cpu = 0;
 
-	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	SYSCALL( sporadic_partitioned(10, 100, cpu) );
 	SYSCALL( task_mode(LITMUS_RT_TASK) );
@@ -250,7 +250,7 @@ TESTCASE(lock_dpcp, P_FP,
 {
 	int fd, od, cpu = 1;
 
-	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	SYSCALL( sporadic_partitioned(10, 100, 0) );
 	SYSCALL( task_mode(LITMUS_RT_TASK) );
@@ -281,7 +281,7 @@ TESTCASE(not_lock_pcp_be, P_FP,
 {
 	int fd, od;
 
-	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	/* BE tasks are not even allowed to open a PCP semaphore */
 	SYSCALL_FAILS(EPERM, od = open_pcp_sem(fd, 0, 1) );
@@ -303,7 +303,7 @@ TESTCASE(lock_mpcp, P_FP,
 {
 	int fd, od;
 
-	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT) );
+	SYSCALL( fd = open(".pcp_locks", O_RDONLY | O_CREAT, S_IRUSR) );
 
 	SYSCALL( sporadic_partitioned(10, 100, 0) );
 	SYSCALL( task_mode(LITMUS_RT_TASK) );
-- 
cgit v1.2.2