summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2022-09-12 20:35:31 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2022-09-12 20:45:46 -0400
commit909c37eeaa8060d983de8446540f091ed0d14a4d (patch)
tree39ef5e88d1ea1cdbee401fcc3ed11cd548082817
parenta6286e09f4a3c78522a12b3d55b53ef1245bf558 (diff)
Clean up and split demand_paging_speed.c from paging_speed.c
This also removes the --no-seq flag from paging_speed.c, which never worked. Cleanups include: - Copyright notices - Better usage messages & code comments - More similar structure in the CPU-side benchmarks - Random disk buffer initialization in demand_paging_speed.c - Output comments detailing data source - Stripping of unused or deprecated codepaths At this point, the mapping of benchmarks to RTSS'22 plots is: - paging_speed: Fig. 4 - demand_paging_speed: Fig. 10 - directio_paging_speed: Fig. 10 - gpu_paging_speed: Fig. 10 - gpu_paging_overhead_speed: Fig. 11 - gpu_paging_evil_task: Figs. 12 and 13
-rw-r--r--demand_paging_speed.c153
-rw-r--r--directio_paging_speed.c47
-rw-r--r--gpu_paging_evil_task.cu11
-rw-r--r--gpu_paging_overhead_speed.cu4
-rw-r--r--gpu_paging_speed.cu4
-rw-r--r--paging_speed.c160
6 files changed, 288 insertions, 91 deletions
diff --git a/demand_paging_speed.c b/demand_paging_speed.c
new file mode 100644
index 0000000..22c3942
--- /dev/null
+++ b/demand_paging_speed.c
@@ -0,0 +1,153 @@
1/**
2 * Copyright 2022 Joshua Bakita
3 * This program clocks how long it takes to page-fault in a 1GiB buffer, making
4 * efforts to exclude userspace overheads.
5 *
6 * More precisely, this program clocks:
7 * mmap(big_buffer);
8 * sequentially_walk(big_buffer);
9 * where `big_buffer` is randomly filled. It then clocks another:
10 * sequentially_walk(big_buffer);
11 * subtracts that time from the first one, and outputs the result. /dev/nvme0n1
12 * is preinitialized with random non-zero bytes.
13 */
14#define _GNU_SOURCE
15
16#include <assert.h>
17#include <fcntl.h>
18#include <stdint.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <sys/mman.h>
22#include <sys/stat.h>
23#include <sys/types.h>
24#include <time.h>
25#include <unistd.h>
26
27#define GiB 1024l*1024l*1024l
28#define s2ns(s) ((s)*1000l*1000l*1000l)
29#define ns2us(ns) ((ns)/1000l)
30#define PAGED_FILE "/dev/nvme0n1"
31#define CLEAR_PAGECACHE_DENTRIES_INODES "3"
32int max(int x, int y) {return x > y ? x : y;}
33
34int seq_walk(char* mem, int len, char to_find) {
35 int num_42 = 0;
36 // Stride of 4096 bytes (one 4k page)
37 for (int i = 4096; i < len; i += 4096)
38 if (mem[i] == to_find)
39 num_42++;
40 return num_42;
41}
42
43// Original function from copy_only.cu
44void fill_rand(char* buf, uint64_t buf_len) {
45 uint64_t i = 0;
46 for (; i < buf_len; i++)
47 buf[i] = max((rand() & 0xff), 1);
48}
49
50uint64_t count_zero(char* buf, uint64_t buf_len) {
51 uint64_t i = 0;
52 uint64_t num_zeros = 0;
53 for (; i < buf_len; i++)
54 num_zeros += (!buf[i]);
55 return num_zeros;
56}
57
58// Subtract first parameter from second parameter. Return as nanoseconds.
59long time_diff_ns(struct timespec start, struct timespec stop) {
60 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec);
61}
62
63int main(int argc, char **argv) {
64 struct timespec start, stop, stop2;
65 int iters, res;
66
67 if (argc != 2 || argv[1][0] == '-') {
68 fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]);
69 return 1;
70 }
71 iters = atoi(argv[1]);
72
73 // If output is redirected, add comment with source details
74 if (!isatty(fileno(stdout)))
75 fprintf(stdout, "# Generated by '%s %s'\n", argv[0], argv[1]);
76
77 // Open control device for use to reset between iterations any caches
78 // that the demand paging system might use.
79 int clear_fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
80 if (clear_fd == -1) {
81 perror("Unable to open /proc/sys/vm/drop_caches");
82 return 1;
83 }
84
85 // Fill the area that we'll read from with random data
86 // (Direct I/O just used here for convenience.)
87 char *mem_in, *mem_out; // In and out of SSD
88 int fd = open(PAGED_FILE, O_RDWR | O_DIRECT | O_SYNC);
89 if (fd == -1) {
90 perror("Unable to open " PAGED_FILE);
91 return 1;
92 }
93 // Aligned malloc(GiB) basicially
94 res = posix_memalign((void**)&mem_in, 4096, GiB);
95 fill_rand(mem_in, GiB);
96 res = write(fd, mem_in, GiB);
97 if (res == -1) {
98 perror("Unable to write inital 1GiB random buffer to " PAGED_FILE);
99 return 1;
100 }
101 if (res != GiB) {
102 fprintf(stderr, "Unable to write the buffer all at once!");
103 return 2;
104 }
105 free(mem_in);
106 close(fd);
107
108 // Output table header. One read sample per row.
109 printf("in (us)\n");
110 // Perform iterations of demand paging in
111 for (int i = 0; i < iters; i++) {
112 int fd = open(PAGED_FILE, O_RDWR);
113 if (fd == -1) {
114 perror("Unable to open " PAGED_FILE);
115 return 1;
116 }
117 // Clear page cache
118 write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1);
119
120 // Begin paging by mapping the file into (unbacked) virtual memory
121 clock_gettime(CLOCK_MONOTONIC_RAW, &start);
122 mem_out = mmap(NULL, GiB, PROT_READ, MAP_PRIVATE, fd, 0);
123 if (mem_out == MAP_FAILED) {
124 perror("Unable to mmap " PAGED_FILE);
125 return 1;
126 }
127 // Page fault in all the data via a sequential walk
128 res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer
129 assert(res > 0);
130 clock_gettime(CLOCK_MONOTONIC_RAW, &stop);
131
132 // This benchmark attempts to capture data comparable to the
133 // GPU or Direct I/O paging benchmarks. As they don't have to
134 // perform a sequential walk, we emulate that here by measuring
135 // how long a sequential walk takes on its own, and subtract
136 // that from `stop - start`.
137 res = seq_walk(mem_out, GiB, 7); // Made up work to fool optimizer
138 assert(res > 0);
139 clock_gettime(CLOCK_MONOTONIC_RAW, &stop2);
140
141 // `stop2 - stop` is how long just a sequential walk takes
142 long seq_walk_time = time_diff_ns(stop, stop2);
143 // `stop - start` is how long the faulting sequential walk took
144 long demand_paging_raw_duration = time_diff_ns(start, stop);
145 // `(stop - start) - (stop2 - stop)` is emulated duration
146 long demand_paging_duration = demand_paging_raw_duration - seq_walk_time;
147 printf("%ld\n", ns2us(demand_paging_duration));
148
149 munmap(mem_out, GiB);
150 close(fd);
151 }
152 return 0;
153}
diff --git a/directio_paging_speed.c b/directio_paging_speed.c
index b0a01d3..9dd6598 100644
--- a/directio_paging_speed.c
+++ b/directio_paging_speed.c
@@ -1,19 +1,34 @@
1/**
2 * Copyright 2022 Joshua Bakita
3 * This program clocks how long it takes to read, and write, a 1GiB buffer via
4 * Linux direct I/O.
5 *
6 * More precisely, this program clocks:
7 * write(random_buffer);
8 * free(random_buffer);
9 * for writing data via direct I/O, and
10 * malloc(big_buffer);
11 * read(big_buffer);
12 * for reading in data via direct I/O. `random_buffer` is a preinitialized 1GiB
13 * buffer of random non-zero bytes.
14 */
1#define _GNU_SOURCE 15#define _GNU_SOURCE
2 16
3#include <sys/mman.h>
4#include <sys/types.h>
5#include <sys/stat.h>
6#include <fcntl.h> 17#include <fcntl.h>
7#include <stdio.h>
8#include <stdint.h> 18#include <stdint.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <sys/mman.h>
22#include <sys/stat.h>
23#include <sys/types.h>
9#include <time.h> 24#include <time.h>
10#include <unistd.h> 25#include <unistd.h>
11#include <stdlib.h>
12 26
13#define GiB 1024l*1024l*1024l 27#define GiB 1024l*1024l*1024l
14#define s2ns(s) ((s)*1000l*1000l*1000l) 28#define s2ns(s) ((s)*1000l*1000l*1000l)
15#define ns2us(ns) ((ns)/1000l) 29#define ns2us(ns) ((ns)/1000l)
16#define PAGED_FILE "/dev/nvme0n1" 30#define PAGED_FILE "/dev/nvme0n1"
31#define CLEAR_PAGECACHE_DENTRIES_INODES "3"
17int max(int x, int y) {return x > y ? x : y;} 32int max(int x, int y) {return x > y ? x : y;}
18 33
19// Original function from copy_only.cu 34// Original function from copy_only.cu
@@ -31,16 +46,24 @@ uint64_t count_zero(char* buf, uint64_t buf_len) {
31 return num_zeros; 46 return num_zeros;
32} 47}
33 48
49// Subtract first parameter from second parameter. Return as nanoseconds.
34long time_diff_ns(struct timespec start, struct timespec stop) { 50long time_diff_ns(struct timespec start, struct timespec stop) {
35 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); 51 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec);
36} 52}
37 53
38int main(int argc, char **argv) { 54int main(int argc, char **argv) {
39 struct timespec out_start, out_stop, in_start, in_stop; 55 struct timespec out_start, out_stop, in_start, in_stop;
40 int iters = 1; 56 int iters, res;
41 int res; 57
42 if (argc > 1) 58 if (argc != 2 || argv[1][0] == '-') {
43 iters = atoi(argv[1]); 59 fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]);
60 return 1;
61 }
62 iters = atoi(argv[1]);
63
64 // If output is redirected, add comment with source details
65 if (!isatty(fileno(stdout)))
66 fprintf(stdout, "# Generated by '%s %s'\n", argv[0], argv[1]);
44 67
45 // Needed to allow page cache clearing between iterations 68 // Needed to allow page cache clearing between iterations
46 // Note: Shouldn't be needed with O_DIRECT, but include it just in case 69 // Note: Shouldn't be needed with O_DIRECT, but include it just in case
@@ -49,8 +72,8 @@ int main(int argc, char **argv) {
49 perror("Unable to open /proc/sys/vm/drop_caches"); 72 perror("Unable to open /proc/sys/vm/drop_caches");
50 return 1; 73 return 1;
51 } 74 }
52 char clear_cmd = '3';
53 75
76 // Print table header. One read, one write sample per following row
54 printf("out (us)\tin (us)\n"); 77 printf("out (us)\tin (us)\n");
55 for (int i = 0; i < iters; i++) { 78 for (int i = 0; i < iters; i++) {
56 char *mem_in, *mem_out; 79 char *mem_in, *mem_out;
@@ -60,7 +83,7 @@ int main(int argc, char **argv) {
60 return 1; 83 return 1;
61 } 84 }
62 // Clear page cache 85 // Clear page cache
63 write(clear_fd, &clear_cmd, 1); 86 write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1);
64 // Allocate and fill a buffer with random data 87 // Allocate and fill a buffer with random data
65 // Aligned malloc(GiB) basicially 88 // Aligned malloc(GiB) basicially
66 res = posix_memalign((void**)&mem_in, 4096, GiB); 89 res = posix_memalign((void**)&mem_in, 4096, GiB);
@@ -81,7 +104,7 @@ int main(int argc, char **argv) {
81 } 104 }
82 105
83 sleep(1); // Supposedly some other work would happen here 106 sleep(1); // Supposedly some other work would happen here
84 write(clear_fd, &clear_cmd, 1); // Just in case O_DIRECT misbehaves 107 write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1); // Just in case O_DIRECT misbehaves
85 res = lseek(fd, 0, SEEK_SET); // Reposition offset 108 res = lseek(fd, 0, SEEK_SET); // Reposition offset
86 if (res == -1) { 109 if (res == -1) {
87 perror("Unable to seek to offset 0 in " PAGED_FILE); 110 perror("Unable to seek to offset 0 in " PAGED_FILE);
diff --git a/gpu_paging_evil_task.cu b/gpu_paging_evil_task.cu
index 7c1ab59..4b358a4 100644
--- a/gpu_paging_evil_task.cu
+++ b/gpu_paging_evil_task.cu
@@ -1,3 +1,9 @@
1/**
2 * Copyright 2022 Joshua Bakita
3 *
4 * Run many iterations of GPU paging on the same CUDA context, and print the
5 * average time per iteration.
6 */
1#include <stdio.h> 7#include <stdio.h>
2#include <cuda.h> 8#include <cuda.h>
3#include <curand_kernel.h> // curandState_t and curand 9#include <curand_kernel.h> // curandState_t and curand
@@ -78,6 +84,7 @@ __global__ void count_zero(char* buf, uint64_t buf_len) {
78 atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); 84 atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero);
79} 85}
80 86
87// Subtract first parameter from second parameter. Return as nanoseconds.
81long time_diff_ns(struct timespec start, struct timespec stop) { 88long time_diff_ns(struct timespec start, struct timespec stop) {
82 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); 89 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec);
83} 90}
@@ -89,8 +96,8 @@ int main(int argc, char **argv) {
89 cudaStream_t stream1; 96 cudaStream_t stream1;
90 cudaError_t err; 97 cudaError_t err;
91 int iters; 98 int iters;
92 if (argc != 2) { 99 if (argc != 2 || argv[1][0] == '-') {
93 fprintf(stderr, "Usage: %s <iterations>\n", argv[0]); 100 fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]);
94 return 1; 101 return 1;
95 } 102 }
96 iters = atoi(argv[1]); 103 iters = atoi(argv[1]);
diff --git a/gpu_paging_overhead_speed.cu b/gpu_paging_overhead_speed.cu
index c7a0f3a..0cf364e 100644
--- a/gpu_paging_overhead_speed.cu
+++ b/gpu_paging_overhead_speed.cu
@@ -1,3 +1,6 @@
1/**
2 * Copyright 2022 Joshua Bakita
3 */
1#include <stdio.h> 4#include <stdio.h>
2#include <cuda.h> 5#include <cuda.h>
3#include <curand_kernel.h> // curandState_t and curand 6#include <curand_kernel.h> // curandState_t and curand
@@ -78,6 +81,7 @@ __global__ void count_zero(char* buf, uint64_t buf_len) {
78 atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); 81 atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero);
79} 82}
80 83
84// Subtract first parameter from second parameter. Return as nanoseconds.
81long time_diff_ns(struct timespec start, struct timespec stop) { 85long time_diff_ns(struct timespec start, struct timespec stop) {
82 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); 86 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec);
83} 87}
diff --git a/gpu_paging_speed.cu b/gpu_paging_speed.cu
index 72cb82e..bca02d9 100644
--- a/gpu_paging_speed.cu
+++ b/gpu_paging_speed.cu
@@ -1,3 +1,6 @@
1/**
2 * Copyright 2022 Joshua Bakita
3 */
1#include <stdio.h> 4#include <stdio.h>
2#include <cuda.h> 5#include <cuda.h>
3#include <curand_kernel.h> // curandState_t and curand 6#include <curand_kernel.h> // curandState_t and curand
@@ -78,6 +81,7 @@ __global__ void count_zero(char* buf, uint64_t buf_len) {
78 atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); 81 atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero);
79} 82}
80 83
84// Subtract first parameter from second parameter. Return as nanoseconds.
81long time_diff_ns(struct timespec start, struct timespec stop) { 85long time_diff_ns(struct timespec start, struct timespec stop) {
82 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); 86 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec);
83} 87}
diff --git a/paging_speed.c b/paging_speed.c
index 4ad56e2..7b0cd22 100644
--- a/paging_speed.c
+++ b/paging_speed.c
@@ -1,19 +1,37 @@
1/**
2 * Copyright 2022 Joshua Bakita
3 * This program clocks how long it takes to read in a 1Gib buffer via page
4 * faulting and via direct I/O, with efforts to match the overheads.
5 *
6 * More precisely, this program clocks:
7 * mmap(big_buffer);
8 * sequentially_walk(big_buffer);
9 * for demand paging, and:
10 * malloc(big_buffer);
11 * read(big_buffer);
12 * walk(big_buffer);
13 * for direct I/O. `big_buffer` is the arbitrary bytes contained in the first
14 * 1GiB of /dev/nvme0n1, and should be filled with random data prior to
15 * executing this microbenchmark.
16 */
1#define _GNU_SOURCE 17#define _GNU_SOURCE
2 18
3#include <sys/mman.h> 19#include <assert.h>
4#include <sys/types.h>
5#include <sys/stat.h>
6#include <fcntl.h> 20#include <fcntl.h>
7#include <stdio.h>
8#include <stdint.h> 21#include <stdint.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <sys/types.h>
9#include <time.h> 27#include <time.h>
10#include <unistd.h> 28#include <unistd.h>
11#include <stdlib.h>
12#include <string.h> // strlen()
13 29
14#define GiB 1024l*1024l*1024l 30#define GiB 1024l*1024l*1024l
15#define s2ns(s) ((s)*1000l*1000l*1000l) 31#define s2ns(s) ((s)*1000l*1000l*1000l)
16 32#define ns2us(ns) ((ns)/1000l)
33#define PAGED_FILE "/dev/nvme0n1"
34#define CLEAR_PAGECACHE_DENTRIES_INODES "3"
17 35
18int seq_walk(char* mem, int len, char to_find) { 36int seq_walk(char* mem, int len, char to_find) {
19 int num_42 = 0; 37 int num_42 = 0;
@@ -24,91 +42,82 @@ int seq_walk(char* mem, int len, char to_find) {
24 return num_42; 42 return num_42;
25} 43}
26 44
45// Subtract first parameter from second parameter. Return as nanoseconds.
27long time_diff_ns(struct timespec start, struct timespec stop) { 46long time_diff_ns(struct timespec start, struct timespec stop) {
28 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); 47 return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec);
29} 48}
30// TODO: take *num_42, return time
31
32//#define PAGED_FILE "/home/jbakita/1gib_random_f"
33#define PAGED_FILE "/dev/nvme0n1"
34 49
35int main(int argc, char **argv) { 50int main(int argc, char **argv) {
36 int iters = 1; 51 struct timespec start, stop;
37 int no_seq = 0; 52 int iters, res;
38 if (argc > 1) 53 char* mem_out;
39 iters = atoi(argv[1]); 54
40 if (argc > 2) { 55 if (argc != 2 || argv[1][0] == '-') {
41 no_seq = strncmp(argv[2], "--no-seq", strlen(argv[2])) ? 1 : 0; 56 fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]);
42 fprintf(stderr, "Skipping seq, but using no-seq emulation with demand paging\n"); 57 return 1;
43 } 58 }
44 struct timespec start, stop, seq_stop; 59 iters = atoi(argv[1]);
60
61 // Add comment with source details
62 fprintf(stdout, "# Generated by '%s %s'. Row 1 is demand paging, row 2 is direct I/O.\n", argv[0], argv[1]);
63
64 // Open control device for use to reset between iterations any caches
65 // that the demand paging system might use.
45 int clear_fd = open("/proc/sys/vm/drop_caches", O_WRONLY); 66 int clear_fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
46 if (clear_fd == -1) { 67 if (clear_fd == -1) {
47 perror("Unable to open /proc/sys/vm/drop_caches"); 68 perror("Unable to open /proc/sys/vm/drop_caches");
48 return 1; 69 return 1;
49 } 70 }
50 71
51 char clear_cmd = '3'; 72 // Perform iterations of demand paging in
52 for (int i = 0; i < iters; i++) { 73 for (int i = 0; i < iters; i++) {
53 int fd = open(PAGED_FILE, O_RDWR); 74 int fd = open(PAGED_FILE, O_RDWR);
54 if (fd == -1) { 75 if (fd == -1) {
55 perror("Unable to open " PAGED_FILE); 76 perror("Unable to open " PAGED_FILE);
56 return 1; 77 return 1;
57 } 78 }
58 // Clear page cache 79 // Clear page cache
59 write(clear_fd, &clear_cmd, 1); 80 write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1);
60 // VIA MMAP 81
61 clock_gettime(CLOCK_MONOTONIC_RAW, &start); 82 // Begin paging by mapping the file into (unbacked) virtual memory
62 char* mem = mmap(NULL, GiB, PROT_READ, MAP_PRIVATE, fd, 0); 83 clock_gettime(CLOCK_MONOTONIC_RAW, &start);
63 if (mem == MAP_FAILED) { 84 mem_out = mmap(NULL, GiB, PROT_READ, MAP_PRIVATE, fd, 0);
64 perror("Unable to mmap " PAGED_FILE); 85 if (mem_out == MAP_FAILED) {
65 return 1; 86 perror("Unable to mmap " PAGED_FILE);
66 } 87 return 1;
67 // Fault on all the pages via a sequential walk 88 }
68 int num_42 = seq_walk(mem, GiB, 42); 89 // Page fault in all the data via a sequential walk
69 clock_gettime(CLOCK_MONOTONIC_RAW, &stop); 90 res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer
70 int num_52 = 0; 91 assert(res > 0);
71 if (no_seq) 92 clock_gettime(CLOCK_MONOTONIC_RAW, &stop);
72 num_52 = seq_walk(mem, GiB, 52); 93
73 clock_gettime(CLOCK_MONOTONIC_RAW, &seq_stop); 94 long duration = time_diff_ns(start, stop);
74 if (num_52) 95 printf("%ld, ", ns2us(duration));
75 fprintf(stderr, "Something is seriously wrong! Found a 52 in a buffer that should be only 42s\n"); 96
76 long duration = (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); 97 munmap(mem_out, GiB);
77 // Emulate the time demand paging would take if we didn't have to walk
78 if (no_seq) {
79 long seq_time = time_diff_ns(stop, seq_stop);
80 duration -= seq_time;
81 }
82 if (iters == 1) {
83 printf("Took %ldus via mmap\n", duration / 1000);
84 printf("Read %d 42s of %ld expected\n", num_42, GiB/4096);
85 } else {
86 printf("%ld, ", duration / 1000);
87 }
88 munmap(mem, GiB);
89 close(fd); 98 close(fd);
90 } 99 }
91 if (iters > 1) 100 printf("\n");
92 printf("\n");
93 101
102 // Perform iterations of paging in via direct I/O
94 for (int i = 0; i < iters; i++) { 103 for (int i = 0; i < iters; i++) {
95 char* mem;
96 int fd = open(PAGED_FILE, O_RDWR | O_DIRECT); 104 int fd = open(PAGED_FILE, O_RDWR | O_DIRECT);
97 if (fd == -1) { 105 if (fd == -1) {
98 perror("Unable to open " PAGED_FILE); 106 perror("Unable to open " PAGED_FILE);
99 return 1; 107 return 1;
100 } 108 }
101 // Clear page cache 109 // Clear page cache
102 write(clear_fd, &clear_cmd, 1); 110 write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1);
103 // VIA READ 111
112 // Load in buffer via direct I/O
104 clock_gettime(CLOCK_MONOTONIC_RAW, &start); 113 clock_gettime(CLOCK_MONOTONIC_RAW, &start);
105 // Aligned malloc(GiB) basicially 114 // Aligned malloc(GiB) basicially
106 int res = posix_memalign((void**)&mem, 4096, GiB); 115 res = posix_memalign((void**)&mem_out, 4096, GiB);
107 if (res) { 116 if (res) {
108 fprintf(stderr, "posix_memalign() failure. Error %d.", res); 117 fprintf(stderr, "posix_memalign() failure. Error %d.", res);
109 return 1; 118 return 1;
110 } 119 }
111 res = read(fd, mem, GiB); 120 res = read(fd, mem_out, GiB);
112 if (res == -1) { 121 if (res == -1) {
113 perror("Unable to read 1GiB from /dev/nvme0n1"); 122 perror("Unable to read 1GiB from /dev/nvme0n1");
114 return 1; 123 return 1;
@@ -117,21 +126,18 @@ int main(int argc, char **argv) {
117 fprintf(stderr, "Unable to read the buffer all at once!"); 126 fprintf(stderr, "Unable to read the buffer all at once!");
118 return 2; 127 return 2;
119 } 128 }
120 int num_42 = 0; 129 // This sequential walk is not strictly necessary, but is
121 if (!no_seq) 130 // included to match the overheads of the demand paging path
122 num_42 = seq_walk(mem, GiB, 42); // Not strictly necessary, but to match mmap path overheads 131 res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer
132 assert(res);
123 clock_gettime(CLOCK_MONOTONIC_RAW, &stop); 133 clock_gettime(CLOCK_MONOTONIC_RAW, &stop);
124 if (iters == 1) { 134
125 printf("Took %ldus via read\n", ((s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec)) / 1000); 135 long duration = time_diff_ns(start, stop);
126 if (!no_seq) 136 printf("%ld, ", ns2us(duration));
127 printf("Read %d 42s of %ld expected\n", num_42, GiB/4096); 137
128 } else {
129 printf("%ld, ", ((s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec)) / 1000);
130 }
131 close(fd); 138 close(fd);
132 free(mem); 139 free(mem_out);
133 } 140 }
134 if (iters > 1) 141 printf("\n");
135 printf("\n");
136 return 0; 142 return 0;
137} 143}