summaryrefslogtreecommitdiffstats
path: root/paging_speed.c
blob: 7b0cd22adf16ec3b278e1753ee4a61e8c5a4e4d6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/**
 * Copyright 2022 Joshua Bakita
 * This program clocks how long it takes to read in a 1Gib buffer via page
 * faulting and via direct I/O, with efforts to match the overheads.
 *
 * More precisely, this program clocks:
 *   mmap(big_buffer);
 *   sequentially_walk(big_buffer);
 * for demand paging, and:
 *   malloc(big_buffer);
 *   read(big_buffer);
 *   walk(big_buffer);
 * for direct I/O. `big_buffer` is the arbitrary bytes contained in the first
 * 1GiB of /dev/nvme0n1, and should be filled with random data prior to
 * executing this microbenchmark.
 */
#define _GNU_SOURCE

#include <assert.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>

#define GiB 1024l*1024l*1024l
#define s2ns(s) ((s)*1000l*1000l*1000l)
#define ns2us(ns) ((ns)/1000l)
#define PAGED_FILE "/dev/nvme0n1"
#define CLEAR_PAGECACHE_DENTRIES_INODES "3"

int seq_walk(char* mem, int len, char to_find) {
	int num_42 = 0;
	// Stride of 4096 bytes (one 4k page)
	for (int i = 4096; i < len; i += 4096)
		if (mem[i] == to_find)
			num_42++;
	return num_42;
}

// Subtract first parameter from second parameter. Return as nanoseconds.
long time_diff_ns(struct timespec start, struct timespec stop) {
	return (s2ns(stop.tv_sec) + stop.tv_nsec) - (s2ns(start.tv_sec) + start.tv_nsec);
}

int main(int argc, char **argv) {
	struct timespec start, stop;
	int iters, res;
	char* mem_out;

	if (argc != 2 || argv[1][0] == '-') {
		fprintf(stderr, "Usage: %s <number of iterations>\n", argv[0]);
		return 1;
	}
	iters = atoi(argv[1]);

        // Add comment with source details
	fprintf(stdout, "# Generated by '%s %s'. Row 1 is demand paging, row 2 is direct I/O.\n", argv[0], argv[1]);

        // Open control device for use to reset between iterations any caches
        // that the demand paging system might use.
	int clear_fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
	if (clear_fd == -1) {
		perror("Unable to open /proc/sys/vm/drop_caches");
		return 1;
	}

	// Perform iterations of demand paging in
	for (int i = 0; i < iters; i++) {
                int fd = open(PAGED_FILE, O_RDWR);
                if (fd == -1) {
                        perror("Unable to open " PAGED_FILE);
                        return 1;
                }
                // Clear page cache
                write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1);

                // Begin paging by mapping the file into (unbacked) virtual memory
                clock_gettime(CLOCK_MONOTONIC_RAW, &start);
                mem_out = mmap(NULL, GiB, PROT_READ, MAP_PRIVATE, fd, 0);
                if (mem_out == MAP_FAILED) {
                        perror("Unable to mmap " PAGED_FILE);
                        return 1;
                }
                // Page fault in all the data via a sequential walk
                res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer
                assert(res > 0);
                clock_gettime(CLOCK_MONOTONIC_RAW, &stop);

		long duration = time_diff_ns(start, stop);
		printf("%ld, ", ns2us(duration));

		munmap(mem_out, GiB);
		close(fd);
	}
	printf("\n");

	// Perform iterations of paging in via direct I/O
	for (int i = 0; i < iters; i++) {
		int fd = open(PAGED_FILE, O_RDWR | O_DIRECT);
		if (fd == -1) {
			perror("Unable to open " PAGED_FILE);
			return 1;
		}
                // Clear page cache
                write(clear_fd, CLEAR_PAGECACHE_DENTRIES_INODES, 1);

		// Load in buffer via direct I/O
		clock_gettime(CLOCK_MONOTONIC_RAW, &start);
		// Aligned malloc(GiB) basicially
		res = posix_memalign((void**)&mem_out, 4096, GiB);
		if (res) {
			fprintf(stderr, "posix_memalign() failure. Error %d.", res);
			return 1;
		}
		res = read(fd, mem_out, GiB);
		if (res == -1) {
			perror("Unable to read 1GiB from /dev/nvme0n1");
			return 1;
		}
		if (res < GiB) {
			fprintf(stderr, "Unable to read the buffer all at once!");
			return 2;
		}
		// This sequential walk is not strictly necessary, but is
		// included to match the overheads of the demand paging path
		res = seq_walk(mem_out, GiB, 42); // Made up work to fool optimizer
		assert(res);
		clock_gettime(CLOCK_MONOTONIC_RAW, &stop);

		long duration = time_diff_ns(start, stop);
		printf("%ld, ", ns2us(duration));

		close(fd);
		free(mem_out);
	}
	printf("\n");
	return 0;
}