diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2022-09-12 20:35:31 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2022-09-12 20:45:46 -0400 |
commit | 909c37eeaa8060d983de8446540f091ed0d14a4d (patch) | |
tree | 39ef5e88d1ea1cdbee401fcc3ed11cd548082817 | |
parent | a6286e09f4a3c78522a12b3d55b53ef1245bf558 (diff) |
Clean up and split demand_paging_speed.c from paging_speed.c
This also removes the --no-seq flag from paging_speed.c, which
never worked.
Cleanups include:
- Copyright notices
- Better usage messages & code comments
- More similar structure in the CPU-side benchmarks
- Random disk buffer initialization in demand_paging_speed.c
- Output comments detailing data source
- Stripping of unused or deprecated codepaths
At this point, the mapping of benchmarks to RTSS'22 plots is:
- paging_speed: Fig. 4
- demand_paging_speed: Fig. 10
- directio_paging_speed: Fig. 10
- gpu_paging_speed: Fig. 10
- gpu_paging_overhead_speed: Fig. 11
- gpu_paging_evil_task: Figs. 12 and 13
-rw-r--r-- | demand_paging_speed.c | 153 | ||||
-rw-r--r-- | directio_paging_speed.c | 47 | ||||
-rw-r--r-- | gpu_paging_evil_task.cu | 11 | ||||
-rw-r--r-- | gpu_paging_overhead_speed.cu | 4 | ||||
-rw-r--r-- | gpu_paging_speed.cu | 4 | ||||
-rw-r--r-- | paging_speed.c | 160 |
6 files changed, 288 insertions, 91 deletions
diff --git a/demand_paging_speed.c b/demand_paging_speed.c new file mode 100644 index 0000000..22c3942 --- /dev/null +++ b/demand_paging_speed.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /** | ||
2 | * Copyright 2022 Joshua Bakita | ||
3 | * This program clocks how long it takes to page-fault in a 1GiB buffer, making | ||
4 | * efforts to exclude userspace overheads. | ||
5 | * | ||
6 | * More precisely, this program clocks: | ||
7 | * mmap(big_buffer); | ||
8 | * sequentially_walk(big_buffer); | ||
9 | * where `big_buffer` is randomly filled. It then clocks another: | ||
10 | * sequentially_walk(big_buffer); | ||
11 | * subtracts that time from the first one, and outputs the result. /dev/nvme0n1 | ||
12 | * is preinitialized with random non-zero bytes. | ||
13 | */ | ||
14 | #define _GNU_SOURCE | ||
15 | |||
16 | #include <assert.h> | ||
17 | #include <fcntl.h> | ||
18 | #include <stdint.h> | ||
19 | #include <stdio.h> | ||
20 | #include <stdlib.h> | ||
21 | #include <sys/mman.h> | ||
22 | #include <sys/stat.h> | ||
23 | #include <sys/types.h> | ||
24 | #include <time.h> | ||
25 | #include <unistd.h> | ||
26 | |||
27 | #define GiB 1024l*1024l*1024l | ||
28 | #define s2ns(s) ((s)*1000l*1000l*1000l) | ||
29 | #define ns2us(ns) ((ns)/1000l) | ||
30 | #define PAGED_FILE "/dev/nvme0n1" | ||
31 | #define CLEAR_PAGECACHE_DENTRIES_INODES "3" | ||
32 | int max(int x, int y) {return x > y ? x : y;} | ||
33 | |||
34 | int seq_walk(char* mem, int len, char to_find) { | ||
35 | int num_42 = 0; | ||
36 | // Stride of 4096 bytes (one 4k page) | ||
37 | for (int i = 4096; i < len; i += 4096) | ||
38 | if (mem[i] == to_find) | ||
39 | num_42++; | ||
40 | return num_42; | ||
41 | } | ||
42 | |||
43 | // Original function from copy_only.cu | ||
44 | void fill_rand(char* buf, uint64_t buf_len) { | ||
45 | uint64_t i = 0; | ||
46 | for (; i < buf_len; i++) | ||
47 | buf[i] = max((rand() & 0xff), 1); | ||
48 | } | ||
49 | |||
50 | uint64_t count_zero(char* buf, uint64_t buf_len) { | ||
51 | uint64_t i = 0; | ||
52 | uint64_t num_zeros = 0; | ||
53 | for (; i < buf_len; i++) | ||
54 | num_zeros += (!buf[i]); | ||
55 | return num_zeros; | ||
56 | } | ||
57 | |||
58 | // Subtract first parameter from second parameter. Return as nanoseconds. | ||
59 | long time_diff_ns(struct timespec start, struct timespec stop) { | ||
60 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); | ||
61 | } | ||
62 | |||
63 | int main(int argc, char **argv) { | ||
64 | struct timespec start, stop, stop2; | ||
65 | int iters, res; | ||
66 | |||
67 | if (argc != 2 || argv[1][0] == '-') { | ||
68 | fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]); | ||
69 | return 1; | ||
70 | } | ||
71 | iters = atoi(argv[1]); | ||
72 | |||
73 | // If output is redirected, add comment with source details | ||
74 | if (!isatty(fileno(stdout))) | ||
75 | fprintf(stdout, "# Generated by '%s %s'\n", argv[0], argv[1]); | ||
76 | |||
77 | // Open control device for use to reset between iterations any caches | ||
78 | // that the demand paging system might use. | ||
79 | int clear_fd = open("/proc/sys/vm/drop_caches", O_WRONLY); | ||
80 | if (clear_fd == -1) { | ||
81 | perror("Unable to open /proc/sys/vm/drop_caches"); | ||
82 | return 1; | ||
83 | } | ||
84 | |||
85 | // Fill the area that we'll read from with random data | ||
86 | // (Direct I/O just used here for convenience.) | ||
87 | char *mem_in, *mem_out; // In and out of SSD | ||
88 | int fd = open(PAGED_FILE, O_RDWR | O_DIRECT | O_SYNC); | ||
89 | if (fd == -1) { | ||
90 | perror("Unable to open " PAGED_FILE); | ||
91 | return 1; | ||
92 | } | ||
93 | // Aligned malloc(GiB) basicially | ||
94 | res = posix_memalign((void**)&mem_in, 4096, GiB); | ||
95 | fill_rand(mem_in, GiB); | ||
96 | res = write(fd, mem_in, GiB); | ||
97 | if (res == -1) { | ||
98 | perror("Unable to write inital 1GiB random buffer to " PAGED_FILE); | ||
99 | return 1; | ||
100 | } | ||
101 | if (res != GiB) { | ||
102 | fprintf(stderr, "Unable to write the buffer all at once!"); | ||
103 | return 2; | ||
104 | } | ||
105 | free(mem_in); | ||
106 | close(fd); | ||
107 | |||
108 | // Output table header. One read sample per row. | ||
109 | printf("in (us)\n"); | ||
110 | // Perform iterations of demand paging in | ||
111 | for (int i = 0; i < iters; i++) { | ||
112 | int fd = open(PAGED_FILE, O_RDWR); | ||
113 | if (fd == -1) { | ||
114 | perror("Unable to open " PAGED_FILE); | ||
115 | return 1; | ||
116 | } | ||
117 | // Clear page cache | ||
118 | write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1); | ||
119 | |||
120 | // Begin paging by mapping the file into (unbacked) virtual memory | ||
121 | clock_gettime(CLOCK_MONOTONIC_RAW, &start); | ||
122 | mem_out = mmap(NULL, GiB, PROT_READ, MAP_PRIVATE, fd, 0); | ||
123 | if (mem_out == MAP_FAILED) { | ||
124 | perror("Unable to mmap " PAGED_FILE); | ||
125 | return 1; | ||
126 | } | ||
127 | // Page fault in all the data via a sequential walk | ||
128 | res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer | ||
129 | assert(res > 0); | ||
130 | clock_gettime(CLOCK_MONOTONIC_RAW, &stop); | ||
131 | |||
132 | // This benchmark attempts to capture data comparable to the | ||
133 | // GPU or Direct I/O paging benchmarks. As they don't have to | ||
134 | // perform a sequential walk, we emulate that here by measuring | ||
135 | // how long a sequential walk takes on its own, and subtract | ||
136 | // that from `stop - start`. | ||
137 | res = seq_walk(mem_out, GiB, 7); // Made up work to fool optimizer | ||
138 | assert(res > 0); | ||
139 | clock_gettime(CLOCK_MONOTONIC_RAW, &stop2); | ||
140 | |||
141 | // `stop2 - stop` is how long just a sequential walk takes | ||
142 | long seq_walk_time = time_diff_ns(stop, stop2); | ||
143 | // `stop - start` is how long the faulting sequential walk took | ||
144 | long demand_paging_raw_duration = time_diff_ns(start, stop); | ||
145 | // `(stop - start) - (stop2 - stop)` is emulated duration | ||
146 | long demand_paging_duration = demand_paging_raw_duration - seq_walk_time; | ||
147 | printf("%ld\n", ns2us(demand_paging_duration)); | ||
148 | |||
149 | munmap(mem_out, GiB); | ||
150 | close(fd); | ||
151 | } | ||
152 | return 0; | ||
153 | } | ||
diff --git a/directio_paging_speed.c b/directio_paging_speed.c index b0a01d3..9dd6598 100644 --- a/directio_paging_speed.c +++ b/directio_paging_speed.c | |||
@@ -1,19 +1,34 @@ | |||
1 | /** | ||
2 | * Copyright 2022 Joshua Bakita | ||
3 | * This program clocks how long it takes to read, and write, a 1GiB buffer via | ||
4 | * Linux direct I/O. | ||
5 | * | ||
6 | * More precisely, this program clocks: | ||
7 | * write(random_buffer); | ||
8 | * free(random_buffer); | ||
9 | * for writing data via direct I/O, and | ||
10 | * malloc(big_buffer); | ||
11 | * read(big_buffer); | ||
12 | * for reading in data via direct I/O. `random_buffer` is a preinitialized 1GiB | ||
13 | * buffer of random non-zero bytes. | ||
14 | */ | ||
1 | #define _GNU_SOURCE | 15 | #define _GNU_SOURCE |
2 | 16 | ||
3 | #include <sys/mman.h> | ||
4 | #include <sys/types.h> | ||
5 | #include <sys/stat.h> | ||
6 | #include <fcntl.h> | 17 | #include <fcntl.h> |
7 | #include <stdio.h> | ||
8 | #include <stdint.h> | 18 | #include <stdint.h> |
19 | #include <stdio.h> | ||
20 | #include <stdlib.h> | ||
21 | #include <sys/mman.h> | ||
22 | #include <sys/stat.h> | ||
23 | #include <sys/types.h> | ||
9 | #include <time.h> | 24 | #include <time.h> |
10 | #include <unistd.h> | 25 | #include <unistd.h> |
11 | #include <stdlib.h> | ||
12 | 26 | ||
13 | #define GiB 1024l*1024l*1024l | 27 | #define GiB 1024l*1024l*1024l |
14 | #define s2ns(s) ((s)*1000l*1000l*1000l) | 28 | #define s2ns(s) ((s)*1000l*1000l*1000l) |
15 | #define ns2us(ns) ((ns)/1000l) | 29 | #define ns2us(ns) ((ns)/1000l) |
16 | #define PAGED_FILE "/dev/nvme0n1" | 30 | #define PAGED_FILE "/dev/nvme0n1" |
31 | #define CLEAR_PAGECACHE_DENTRIES_INODES "3" | ||
17 | int max(int x, int y) {return x > y ? x : y;} | 32 | int max(int x, int y) {return x > y ? x : y;} |
18 | 33 | ||
19 | // Original function from copy_only.cu | 34 | // Original function from copy_only.cu |
@@ -31,16 +46,24 @@ uint64_t count_zero(char* buf, uint64_t buf_len) { | |||
31 | return num_zeros; | 46 | return num_zeros; |
32 | } | 47 | } |
33 | 48 | ||
49 | // Subtract first parameter from second parameter. Return as nanoseconds. | ||
34 | long time_diff_ns(struct timespec start, struct timespec stop) { | 50 | long time_diff_ns(struct timespec start, struct timespec stop) { |
35 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); | 51 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); |
36 | } | 52 | } |
37 | 53 | ||
38 | int main(int argc, char **argv) { | 54 | int main(int argc, char **argv) { |
39 | struct timespec out_start, out_stop, in_start, in_stop; | 55 | struct timespec out_start, out_stop, in_start, in_stop; |
40 | int iters = 1; | 56 | int iters, res; |
41 | int res; | 57 | |
42 | if (argc > 1) | 58 | if (argc != 2 || argv[1][0] == '-') { |
43 | iters = atoi(argv[1]); | 59 | fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]); |
60 | return 1; | ||
61 | } | ||
62 | iters = atoi(argv[1]); | ||
63 | |||
64 | // If output is redirected, add comment with source details | ||
65 | if (!isatty(fileno(stdout))) | ||
66 | fprintf(stdout, "# Generated by '%s %s'\n", argv[0], argv[1]); | ||
44 | 67 | ||
45 | // Needed to allow page cache clearing between iterations | 68 | // Needed to allow page cache clearing between iterations |
46 | // Note: Shouldn't be needed with O_DIRECT, but include it just in case | 69 | // Note: Shouldn't be needed with O_DIRECT, but include it just in case |
@@ -49,8 +72,8 @@ int main(int argc, char **argv) { | |||
49 | perror("Unable to open /proc/sys/vm/drop_caches"); | 72 | perror("Unable to open /proc/sys/vm/drop_caches"); |
50 | return 1; | 73 | return 1; |
51 | } | 74 | } |
52 | char clear_cmd = '3'; | ||
53 | 75 | ||
76 | // Print table header. One read, one write sample per following row | ||
54 | printf("out (us)\tin (us)\n"); | 77 | printf("out (us)\tin (us)\n"); |
55 | for (int i = 0; i < iters; i++) { | 78 | for (int i = 0; i < iters; i++) { |
56 | char *mem_in, *mem_out; | 79 | char *mem_in, *mem_out; |
@@ -60,7 +83,7 @@ int main(int argc, char **argv) { | |||
60 | return 1; | 83 | return 1; |
61 | } | 84 | } |
62 | // Clear page cache | 85 | // Clear page cache |
63 | write(clear_fd, &clear_cmd, 1); | 86 | write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1); |
64 | // Allocate and fill a buffer with random data | 87 | // Allocate and fill a buffer with random data |
65 | // Aligned malloc(GiB) basicially | 88 | // Aligned malloc(GiB) basicially |
66 | res = posix_memalign((void**)&mem_in, 4096, GiB); | 89 | res = posix_memalign((void**)&mem_in, 4096, GiB); |
@@ -81,7 +104,7 @@ int main(int argc, char **argv) { | |||
81 | } | 104 | } |
82 | 105 | ||
83 | sleep(1); // Supposedly some other work would happen here | 106 | sleep(1); // Supposedly some other work would happen here |
84 | write(clear_fd, &clear_cmd, 1); // Just in case O_DIRECT misbehaves | 107 | write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1); // Just in case O_DIRECT misbehaves |
85 | res = lseek(fd, 0, SEEK_SET); // Reposition offset | 108 | res = lseek(fd, 0, SEEK_SET); // Reposition offset |
86 | if (res == -1) { | 109 | if (res == -1) { |
87 | perror("Unable to seek to offset 0 in " PAGED_FILE); | 110 | perror("Unable to seek to offset 0 in " PAGED_FILE); |
diff --git a/gpu_paging_evil_task.cu b/gpu_paging_evil_task.cu index 7c1ab59..4b358a4 100644 --- a/gpu_paging_evil_task.cu +++ b/gpu_paging_evil_task.cu | |||
@@ -1,3 +1,9 @@ | |||
1 | /** | ||
2 | * Copyright 2022 Joshua Bakita | ||
3 | * | ||
4 | * Run many iterations of GPU paging on the same CUDA context, and print the | ||
5 | * average time per iteration. | ||
6 | */ | ||
1 | #include <stdio.h> | 7 | #include <stdio.h> |
2 | #include <cuda.h> | 8 | #include <cuda.h> |
3 | #include <curand_kernel.h> // curandState_t and curand | 9 | #include <curand_kernel.h> // curandState_t and curand |
@@ -78,6 +84,7 @@ __global__ void count_zero(char* buf, uint64_t buf_len) { | |||
78 | atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); | 84 | atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); |
79 | } | 85 | } |
80 | 86 | ||
87 | // Subtract first parameter from second parameter. Return as nanoseconds. | ||
81 | long time_diff_ns(struct timespec start, struct timespec stop) { | 88 | long time_diff_ns(struct timespec start, struct timespec stop) { |
82 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); | 89 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); |
83 | } | 90 | } |
@@ -89,8 +96,8 @@ int main(int argc, char **argv) { | |||
89 | cudaStream_t stream1; | 96 | cudaStream_t stream1; |
90 | cudaError_t err; | 97 | cudaError_t err; |
91 | int iters; | 98 | int iters; |
92 | if (argc != 2) { | 99 | if (argc != 2 || argv[1][0] == '-') { |
93 | fprintf(stderr, "Usage: %s <iterations>\n", argv[0]); | 100 | fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]); |
94 | return 1; | 101 | return 1; |
95 | } | 102 | } |
96 | iters = atoi(argv[1]); | 103 | iters = atoi(argv[1]); |
diff --git a/gpu_paging_overhead_speed.cu b/gpu_paging_overhead_speed.cu index c7a0f3a..0cf364e 100644 --- a/gpu_paging_overhead_speed.cu +++ b/gpu_paging_overhead_speed.cu | |||
@@ -1,3 +1,6 @@ | |||
1 | /** | ||
2 | * Copyright 2022 Joshua Bakita | ||
3 | */ | ||
1 | #include <stdio.h> | 4 | #include <stdio.h> |
2 | #include <cuda.h> | 5 | #include <cuda.h> |
3 | #include <curand_kernel.h> // curandState_t and curand | 6 | #include <curand_kernel.h> // curandState_t and curand |
@@ -78,6 +81,7 @@ __global__ void count_zero(char* buf, uint64_t buf_len) { | |||
78 | atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); | 81 | atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); |
79 | } | 82 | } |
80 | 83 | ||
84 | // Subtract first parameter from second parameter. Return as nanoseconds. | ||
81 | long time_diff_ns(struct timespec start, struct timespec stop) { | 85 | long time_diff_ns(struct timespec start, struct timespec stop) { |
82 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); | 86 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); |
83 | } | 87 | } |
diff --git a/gpu_paging_speed.cu b/gpu_paging_speed.cu index 72cb82e..bca02d9 100644 --- a/gpu_paging_speed.cu +++ b/gpu_paging_speed.cu | |||
@@ -1,3 +1,6 @@ | |||
1 | /** | ||
2 | * Copyright 2022 Joshua Bakita | ||
3 | */ | ||
1 | #include <stdio.h> | 4 | #include <stdio.h> |
2 | #include <cuda.h> | 5 | #include <cuda.h> |
3 | #include <curand_kernel.h> // curandState_t and curand | 6 | #include <curand_kernel.h> // curandState_t and curand |
@@ -78,6 +81,7 @@ __global__ void count_zero(char* buf, uint64_t buf_len) { | |||
78 | atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); | 81 | atomicAdd_block((unsigned long long int*)&gpu_res, (unsigned long long int)num_zero); |
79 | } | 82 | } |
80 | 83 | ||
84 | // Subtract first parameter from second parameter. Return as nanoseconds. | ||
81 | long time_diff_ns(struct timespec start, struct timespec stop) { | 85 | long time_diff_ns(struct timespec start, struct timespec stop) { |
82 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); | 86 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); |
83 | } | 87 | } |
diff --git a/paging_speed.c b/paging_speed.c index 4ad56e2..7b0cd22 100644 --- a/paging_speed.c +++ b/paging_speed.c | |||
@@ -1,19 +1,37 @@ | |||
1 | /** | ||
2 | * Copyright 2022 Joshua Bakita | ||
3 | * This program clocks how long it takes to read in a 1Gib buffer via page | ||
4 | * faulting and via direct I/O, with efforts to match the overheads. | ||
5 | * | ||
6 | * More precisely, this program clocks: | ||
7 | * mmap(big_buffer); | ||
8 | * sequentially_walk(big_buffer); | ||
9 | * for demand paging, and: | ||
10 | * malloc(big_buffer); | ||
11 | * read(big_buffer); | ||
12 | * walk(big_buffer); | ||
13 | * for direct I/O. `big_buffer` is the arbitrary bytes contained in the first | ||
14 | * 1GiB of /dev/nvme0n1, and should be filled with random data prior to | ||
15 | * executing this microbenchmark. | ||
16 | */ | ||
1 | #define _GNU_SOURCE | 17 | #define _GNU_SOURCE |
2 | 18 | ||
3 | #include <sys/mman.h> | 19 | #include <assert.h> |
4 | #include <sys/types.h> | ||
5 | #include <sys/stat.h> | ||
6 | #include <fcntl.h> | 20 | #include <fcntl.h> |
7 | #include <stdio.h> | ||
8 | #include <stdint.h> | 21 | #include <stdint.h> |
22 | #include <stdio.h> | ||
23 | #include <stdlib.h> | ||
24 | #include <sys/mman.h> | ||
25 | #include <sys/stat.h> | ||
26 | #include <sys/types.h> | ||
9 | #include <time.h> | 27 | #include <time.h> |
10 | #include <unistd.h> | 28 | #include <unistd.h> |
11 | #include <stdlib.h> | ||
12 | #include <string.h> // strlen() | ||
13 | 29 | ||
14 | #define GiB 1024l*1024l*1024l | 30 | #define GiB 1024l*1024l*1024l |
15 | #define s2ns(s) ((s)*1000l*1000l*1000l) | 31 | #define s2ns(s) ((s)*1000l*1000l*1000l) |
16 | 32 | #define ns2us(ns) ((ns)/1000l) | |
33 | #define PAGED_FILE "/dev/nvme0n1" | ||
34 | #define CLEAR_PAGECACHE_DENTRIES_INODES "3" | ||
17 | 35 | ||
18 | int seq_walk(char* mem, int len, char to_find) { | 36 | int seq_walk(char* mem, int len, char to_find) { |
19 | int num_42 = 0; | 37 | int num_42 = 0; |
@@ -24,91 +42,82 @@ int seq_walk(char* mem, int len, char to_find) { | |||
24 | return num_42; | 42 | return num_42; |
25 | } | 43 | } |
26 | 44 | ||
45 | // Subtract first parameter from second parameter. Return as nanoseconds. | ||
27 | long time_diff_ns(struct timespec start, struct timespec stop) { | 46 | long time_diff_ns(struct timespec start, struct timespec stop) { |
28 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); | 47 | return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); |
29 | } | 48 | } |
30 | // TODO: take *num_42, return time | ||
31 | |||
32 | //#define PAGED_FILE "/home/jbakita/1gib_random_f" | ||
33 | #define PAGED_FILE "/dev/nvme0n1" | ||
34 | 49 | ||
35 | int main(int argc, char **argv) { | 50 | int main(int argc, char **argv) { |
36 | int iters = 1; | 51 | struct timespec start, stop; |
37 | int no_seq = 0; | 52 | int iters, res; |
38 | if (argc > 1) | 53 | char* mem_out; |
39 | iters = atoi(argv[1]); | 54 | |
40 | if (argc > 2) { | 55 | if (argc != 2 || argv[1][0] == '-') { |
41 | no_seq = strncmp(argv[2], "--no-seq", strlen(argv[2])) ? 1 : 0; | 56 | fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]); |
42 | fprintf(stderr, "Skipping seq, but using no-seq emulation with demand paging\n"); | 57 | return 1; |
43 | } | 58 | } |
44 | struct timespec start, stop, seq_stop; | 59 | iters = atoi(argv[1]); |
60 | |||
61 | // Add comment with source details | ||
62 | fprintf(stdout, "# Generated by '%s %s'. Row 1 is demand paging, row 2 is direct I/O.\n", argv[0], argv[1]); | ||
63 | |||
64 | // Open control device for use to reset between iterations any caches | ||
65 | // that the demand paging system might use. | ||
45 | int clear_fd = open("/proc/sys/vm/drop_caches", O_WRONLY); | 66 | int clear_fd = open("/proc/sys/vm/drop_caches", O_WRONLY); |
46 | if (clear_fd == -1) { | 67 | if (clear_fd == -1) { |
47 | perror("Unable to open /proc/sys/vm/drop_caches"); | 68 | perror("Unable to open /proc/sys/vm/drop_caches"); |
48 | return 1; | 69 | return 1; |
49 | } | 70 | } |
50 | 71 | ||
51 | char clear_cmd = '3'; | 72 | // Perform iterations of demand paging in |
52 | for (int i = 0; i < iters; i++) { | 73 | for (int i = 0; i < iters; i++) { |
53 | int fd = open(PAGED_FILE, O_RDWR); | 74 | int fd = open(PAGED_FILE, O_RDWR); |
54 | if (fd == -1) { | 75 | if (fd == -1) { |
55 | perror("Unable to open " PAGED_FILE); | 76 | perror("Unable to open " PAGED_FILE); |
56 | return 1; | 77 | return 1; |
57 | } | 78 | } |
58 | // Clear page cache | 79 | // Clear page cache |
59 | write(clear_fd, &clear_cmd, 1); | 80 | write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1); |
60 | // VIA MMAP | 81 | |
61 | clock_gettime(CLOCK_MONOTONIC_RAW, &start); | 82 | // Begin paging by mapping the file into (unbacked) virtual memory |
62 | char* mem = mmap(NULL, GiB, PROT_READ, MAP_PRIVATE, fd, 0); | 83 | clock_gettime(CLOCK_MONOTONIC_RAW, &start); |
63 | if (mem == MAP_FAILED) { | 84 | mem_out = mmap(NULL, GiB, PROT_READ, MAP_PRIVATE, fd, 0); |
64 | perror("Unable to mmap " PAGED_FILE); | 85 | if (mem_out == MAP_FAILED) { |
65 | return 1; | 86 | perror("Unable to mmap " PAGED_FILE); |
66 | } | 87 | return 1; |
67 | // Fault on all the pages via a sequential walk | 88 | } |
68 | int num_42 = seq_walk(mem, GiB, 42); | 89 | // Page fault in all the data via a sequential walk |
69 | clock_gettime(CLOCK_MONOTONIC_RAW, &stop); | 90 | res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer |
70 | int num_52 = 0; | 91 | assert(res > 0); |
71 | if (no_seq) | 92 | clock_gettime(CLOCK_MONOTONIC_RAW, &stop); |
72 | num_52 = seq_walk(mem, GiB, 52); | 93 | |
73 | clock_gettime(CLOCK_MONOTONIC_RAW, &seq_stop); | 94 | long duration = time_diff_ns(start, stop); |
74 | if (num_52) | 95 | printf("%ld, ", ns2us(duration)); |
75 | fprintf(stderr, "Something is seriously wrong! Found a 52 in a buffer that should be only 42s\n"); | 96 | |
76 | long duration = (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec); | 97 | munmap(mem_out, GiB); |
77 | // Emulate the time demand paging would take if we didn't have to walk | ||
78 | if (no_seq) { | ||
79 | long seq_time = time_diff_ns(stop, seq_stop); | ||
80 | duration -= seq_time; | ||
81 | } | ||
82 | if (iters == 1) { | ||
83 | printf("Took %ldus via mmap\n", duration / 1000); | ||
84 | printf("Read %d 42s of %ld expected\n", num_42, GiB/4096); | ||
85 | } else { | ||
86 | printf("%ld, ", duration / 1000); | ||
87 | } | ||
88 | munmap(mem, GiB); | ||
89 | close(fd); | 98 | close(fd); |
90 | } | 99 | } |
91 | if (iters > 1) | 100 | printf("\n"); |
92 | printf("\n"); | ||
93 | 101 | ||
102 | // Perform iterations of paging in via direct I/O | ||
94 | for (int i = 0; i < iters; i++) { | 103 | for (int i = 0; i < iters; i++) { |
95 | char* mem; | ||
96 | int fd = open(PAGED_FILE, O_RDWR | O_DIRECT); | 104 | int fd = open(PAGED_FILE, O_RDWR | O_DIRECT); |
97 | if (fd == -1) { | 105 | if (fd == -1) { |
98 | perror("Unable to open " PAGED_FILE); | 106 | perror("Unable to open " PAGED_FILE); |
99 | return 1; | 107 | return 1; |
100 | } | 108 | } |
101 | // Clear page cache | 109 | // Clear page cache |
102 | write(clear_fd, &clear_cmd, 1); | 110 | write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1); |
103 | // VIA READ | 111 | |
112 | // Load in buffer via direct I/O | ||
104 | clock_gettime(CLOCK_MONOTONIC_RAW, &start); | 113 | clock_gettime(CLOCK_MONOTONIC_RAW, &start); |
105 | // Aligned malloc(GiB) basicially | 114 | // Aligned malloc(GiB) basicially |
106 | int res = posix_memalign((void**)&mem, 4096, GiB); | 115 | res = posix_memalign((void**)&mem_out, 4096, GiB); |
107 | if (res) { | 116 | if (res) { |
108 | fprintf(stderr, "posix_memalign() failure. Error %d.", res); | 117 | fprintf(stderr, "posix_memalign() failure. Error %d.", res); |
109 | return 1; | 118 | return 1; |
110 | } | 119 | } |
111 | res = read(fd, mem, GiB); | 120 | res = read(fd, mem_out, GiB); |
112 | if (res == -1) { | 121 | if (res == -1) { |
113 | perror("Unable to read 1GiB from /dev/nvme0n1"); | 122 | perror("Unable to read 1GiB from /dev/nvme0n1"); |
114 | return 1; | 123 | return 1; |
@@ -117,21 +126,18 @@ int main(int argc, char **argv) { | |||
117 | fprintf(stderr, "Unable to read the buffer all at once!"); | 126 | fprintf(stderr, "Unable to read the buffer all at once!"); |
118 | return 2; | 127 | return 2; |
119 | } | 128 | } |
120 | int num_42 = 0; | 129 | // This sequential walk is not strictly necessary, but is |
121 | if (!no_seq) | 130 | // included to match the overheads of the demand paging path |
122 | num_42 = seq_walk(mem, GiB, 42); // Not strictly necessary, but to match mmap path overheads | 131 | res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer |
132 | assert(res); | ||
123 | clock_gettime(CLOCK_MONOTONIC_RAW, &stop); | 133 | clock_gettime(CLOCK_MONOTONIC_RAW, &stop); |
124 | if (iters == 1) { | 134 | |
125 | printf("Took %ldus via read\n", ((s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec)) / 1000); | 135 | long duration = time_diff_ns(start, stop); |
126 | if (!no_seq) | 136 | printf("%ld, ", ns2us(duration)); |
127 | printf("Read %d 42s of %ld expected\n", num_42, GiB/4096); | 137 | |
128 | } else { | ||
129 | printf("%ld, ", ((s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec)) / 1000); | ||
130 | } | ||
131 | close(fd); | 138 | close(fd); |
132 | free(mem); | 139 | free(mem_out); |
133 | } | 140 | } |
134 | if (iters > 1) | 141 | printf("\n"); |
135 | printf("\n"); | ||
136 | return 0; | 142 | return 0; |
137 | } | 143 | } |