#include #include #include #include #include #include #include #include #include #include #include /* Test tool for validating Litmus's uncache device. */ /* Tool also capable basic cache vs. sysmem statistics. */ /* Compile with '-O2' for significaintly greater margins */ /* in performance between cache and sysmem: */ /* (Intel Xeon X5650) */ /* -g -> uncache is 30x slower */ /* -O2 -> uncache is >100x slower */ int PAGE_SIZE; #define NR_PAGES 16 #define UNCACHE_DEV "/dev/litmus/uncache" /* volatile forces a read from memory (or cache) on every reference. Note that volatile does not keep data out of the cache! */ typedef volatile char* pbuf_t; /* hit the first byte in each page. addr must be page aligned. */ inline int linear_write(pbuf_t addr, int size, char val) { pbuf_t end = addr + size; pbuf_t step; int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE; int times = nr_pages * PAGE_SIZE; int i; for (i = 0; i < times; ++i) for(step = addr; step < end; step += PAGE_SIZE) *step = val; return 0; } inline int linear_read(pbuf_t addr, int size, char val) { pbuf_t end = addr + size; pbuf_t step; int nr_pages = (unsigned long)(end - addr)/PAGE_SIZE; int times = nr_pages * PAGE_SIZE; int i; for (i = 0; i < times; ++i) for(step = addr; step < end; step += PAGE_SIZE) { if (*step != val) return -1; } return 0; } /* write to *data nr times. */ inline int hammer_write(pbuf_t data, char val, int nr) { int i; for (i = 0; i < nr; ++i) *data = val; return 0; } /* read from *data nr times. */ inline int hammer_read(pbuf_t data, char val, int nr) { int i; for (i = 0; i < nr; ++i) { if (*data != val) return -1; } return 0; } inline int test(pbuf_t data, int size, int trials) { int HAMMER_TIME = 10000; /* can't cache this! */ char VAL = 0x55; int t; for(t = 0; t < trials; ++t) { #if 0 if (linear_write(data, size, VAL) != 0) { printf("failed linear_write()\n"); return -1; } if (linear_read(data, size, VAL) != 0) { printf("failed linear_read()\n"); return -1; } #endif /* hammer at the first byte in the array */ if (hammer_write(data, VAL, HAMMER_TIME) != 0) { printf("failed hammer_write()\n"); return -1; } if (hammer_read(data, VAL, HAMMER_TIME) != 0) { printf("failed hammer_read()\n"); return -1; } } return 0; } inline void timespec_normalize(struct timespec* ts, time_t sec, int64_t nsec) { while(nsec > 1000000000LL) { asm("" : "+rm"(nsec)); nsec -= 1000000000LL; ++sec; } while(nsec < 0) { asm("" : "+rm"(nsec)); nsec += 1000000000LL; --sec; } ts->tv_sec = sec; ts->tv_nsec = nsec; } inline struct timespec timespec_sub(struct timespec lhs, struct timespec rhs) { struct timespec delta; timespec_normalize(&delta, lhs.tv_sec - rhs.tv_sec, lhs.tv_nsec - rhs.tv_nsec); return delta; } inline struct timespec timespec_add(struct timespec lhs, struct timespec rhs) { struct timespec delta; timespec_normalize(&delta, lhs.tv_sec + rhs.tv_sec, lhs.tv_nsec + rhs.tv_nsec); return delta; } inline int64_t timespec_to_us(struct timespec ts) { int64_t t; t = ts.tv_sec * 1000000LL; t += ts.tv_nsec / 1000LL; return t; } /* hammers away at the first byte in each mmaped page and times how long it took. */ int do_data(int do_uncache, int64_t* time) { int size; int prot = PROT_READ | PROT_WRITE; int flags = MAP_PRIVATE; pbuf_t data; struct sched_param fifo_params; struct timespec start, end; int64_t elapsed; int trials = 1000; printf("Running data access test.\n"); mlockall(MCL_CURRENT | MCL_FUTURE); memset(&fifo_params, 0, sizeof(fifo_params)); fifo_params.sched_priority = sched_get_priority_max(SCHED_FIFO); size = PAGE_SIZE*NR_PAGES; printf("Allocating %d %s pages.\n", NR_PAGES, (do_uncache) ? "uncacheable" : "cacheable"); if (do_uncache) { int fd = open(UNCACHE_DEV, O_RDWR); data = mmap(NULL, size, prot, flags, fd, 0); close(fd); } else { /* Accessed data will probably fit in L1, so this will go VERY fast. Code should also have little-to-no pipeline stalls. */ flags |= MAP_ANONYMOUS; data = mmap(NULL, size, prot, flags, -1, 0); } if (data == MAP_FAILED) { printf("Failed to alloc data! " "Are you running Litmus? " "Is Litmus broken?\n"); return -1; } else { printf("Data allocated at %p.\n", data); } printf("Beginning tests...\n"); if (sched_setscheduler(getpid(), SCHED_FIFO, &fifo_params)) { printf("(Could not become SCHED_FIFO task.) Are you running as root?\n"); } /* observations suggest that no warmup phase is needed. */ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); if (test(data, size, trials) != 0) { printf("Test failed!\n"); munmap((char*)data, size); return -1; } clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); elapsed = timespec_to_us(timespec_sub(end, start)); printf("%s Time: %"PRIi64"us\n", (do_uncache) ? "Uncache" : "Cache", elapsed); munmap((char*)data, size); if(time) *time = elapsed; return 0; } /* compares runtime of cached vs. uncached */ int do_data_compare() { const double thresh = 1.3; int ret = 0; double ratio; int64_t cache_time = 0, uncache_time = 0; printf("Timing cached pages...\n"); ret = do_data(0, &cache_time); if (ret != 0) goto out; printf("Timing uncached pages...\n"); ret = do_data(1, &uncache_time); if (ret != 0) goto out; ratio = (double)uncache_time/(double)cache_time; printf("Uncached/Cached Ratio: %f\n", ratio); if (ratio < thresh) { printf("Ratio is unexpectedly small (< %f)! " " Uncache broken? Are you on kvm?\n", thresh); ret = -1; } out: return ret; } /* tries to max out uncache allocations. under normal conditions (non-mlock), pages should spill into swap. uncache pages are not locked in memory. */ int do_max_alloc(void) { int fd; int good = 1; int count = 0; uint64_t mmap_size = PAGE_SIZE; /* start at one page per mmap */ /* half of default limit on ubuntu. (see /proc/sys/vm/max_map_count) */ int max_mmaps = 32765; volatile char** maps = calloc(max_mmaps, sizeof(pbuf_t)); if (!maps) { printf("failed to alloc pointers for pages\n"); return -1; } printf("Testing max amount of uncache data. System may get wonkie (OOM Killer)!\n"); fd = open(UNCACHE_DEV, O_RDWR); do { int i; int nr_pages = mmap_size/PAGE_SIZE; printf("Testing mmaps of %d pages.\n", nr_pages); count = 0; for (i = 0; (i < max_mmaps) && good; ++i) { pbuf_t data = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE, fd, 0); if (data != MAP_FAILED) { maps[i] = data; ++count; } else { perror(NULL); good = 0; } } for (i = 0; i < count; ++i) { if (maps[i]) munmap((char*)(maps[i]), mmap_size); } memset(maps, 0, sizeof(maps[0])*max_mmaps); mmap_size *= 2; /* let's do it again with bigger allocations */ }while(good); free(maps); close(fd); printf("Maxed out allocs with %d mmaps of %"PRIu64" pages in size.\n", count, mmap_size/PAGE_SIZE); return 0; } typedef enum { UNCACHE, CACHE, COMPARE, MAX_ALLOC } test_t; #define OPTSTR "ucxa" int main(int argc, char** argv) { int ret; test_t test = UNCACHE; int opt; PAGE_SIZE = sysconf(_SC_PAGE_SIZE); while((opt = getopt(argc, argv, OPTSTR)) != -1) { switch(opt) { case 'c': test = CACHE; break; case 'u': test = UNCACHE; break; case 'x': test = COMPARE; break; case 'a': test = MAX_ALLOC; break; case ':': printf("missing option\n"); exit(-1); case '?': default: printf("bad argument\n"); exit(-1); } } printf("Page Size: %d\n", PAGE_SIZE); switch(test) { case CACHE: ret = do_data(0, NULL); break; case UNCACHE: ret = do_data(1, NULL); break; case COMPARE: ret = do_data_compare(); break; case MAX_ALLOC: ret = do_max_alloc(); break; default: printf("invalid test\n"); ret = -1; break; } if (ret != 0) { printf("Test failed.\n"); } return ret; }