diff options
Diffstat (limited to 'tools/io_uring/queue.c')
-rw-r--r-- | tools/io_uring/queue.c | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c new file mode 100644 index 000000000000..88505e873ad9 --- /dev/null +++ b/tools/io_uring/queue.c | |||
@@ -0,0 +1,164 @@ | |||
1 | #include <sys/types.h> | ||
2 | #include <sys/stat.h> | ||
3 | #include <sys/mman.h> | ||
4 | #include <unistd.h> | ||
5 | #include <errno.h> | ||
6 | #include <string.h> | ||
7 | |||
8 | #include "liburing.h" | ||
9 | #include "barrier.h" | ||
10 | |||
11 | static int __io_uring_get_completion(struct io_uring *ring, | ||
12 | struct io_uring_cqe **cqe_ptr, int wait) | ||
13 | { | ||
14 | struct io_uring_cq *cq = &ring->cq; | ||
15 | const unsigned mask = *cq->kring_mask; | ||
16 | unsigned head; | ||
17 | int ret; | ||
18 | |||
19 | *cqe_ptr = NULL; | ||
20 | head = *cq->khead; | ||
21 | do { | ||
22 | /* | ||
23 | * It's necessary to use a read_barrier() before reading | ||
24 | * the CQ tail, since the kernel updates it locklessly. The | ||
25 | * kernel has the matching store barrier for the update. The | ||
26 | * kernel also ensures that previous stores to CQEs are ordered | ||
27 | * with the tail update. | ||
28 | */ | ||
29 | read_barrier(); | ||
30 | if (head != *cq->ktail) { | ||
31 | *cqe_ptr = &cq->cqes[head & mask]; | ||
32 | break; | ||
33 | } | ||
34 | if (!wait) | ||
35 | break; | ||
36 | ret = io_uring_enter(ring->ring_fd, 0, 1, | ||
37 | IORING_ENTER_GETEVENTS, NULL); | ||
38 | if (ret < 0) | ||
39 | return -errno; | ||
40 | } while (1); | ||
41 | |||
42 | if (*cqe_ptr) { | ||
43 | *cq->khead = head + 1; | ||
44 | /* | ||
45 | * Ensure that the kernel sees our new head, the kernel has | ||
46 | * the matching read barrier. | ||
47 | */ | ||
48 | write_barrier(); | ||
49 | } | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * Return an IO completion, if one is readily available | ||
56 | */ | ||
57 | int io_uring_get_completion(struct io_uring *ring, | ||
58 | struct io_uring_cqe **cqe_ptr) | ||
59 | { | ||
60 | return __io_uring_get_completion(ring, cqe_ptr, 0); | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * Return an IO completion, waiting for it if necessary | ||
65 | */ | ||
66 | int io_uring_wait_completion(struct io_uring *ring, | ||
67 | struct io_uring_cqe **cqe_ptr) | ||
68 | { | ||
69 | return __io_uring_get_completion(ring, cqe_ptr, 1); | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Submit sqes acquired from io_uring_get_sqe() to the kernel. | ||
74 | * | ||
75 | * Returns number of sqes submitted | ||
76 | */ | ||
77 | int io_uring_submit(struct io_uring *ring) | ||
78 | { | ||
79 | struct io_uring_sq *sq = &ring->sq; | ||
80 | const unsigned mask = *sq->kring_mask; | ||
81 | unsigned ktail, ktail_next, submitted; | ||
82 | int ret; | ||
83 | |||
84 | /* | ||
85 | * If we have pending IO in the kring, submit it first. We need a | ||
86 | * read barrier here to match the kernels store barrier when updating | ||
87 | * the SQ head. | ||
88 | */ | ||
89 | read_barrier(); | ||
90 | if (*sq->khead != *sq->ktail) { | ||
91 | submitted = *sq->kring_entries; | ||
92 | goto submit; | ||
93 | } | ||
94 | |||
95 | if (sq->sqe_head == sq->sqe_tail) | ||
96 | return 0; | ||
97 | |||
98 | /* | ||
99 | * Fill in sqes that we have queued up, adding them to the kernel ring | ||
100 | */ | ||
101 | submitted = 0; | ||
102 | ktail = ktail_next = *sq->ktail; | ||
103 | while (sq->sqe_head < sq->sqe_tail) { | ||
104 | ktail_next++; | ||
105 | read_barrier(); | ||
106 | |||
107 | sq->array[ktail & mask] = sq->sqe_head & mask; | ||
108 | ktail = ktail_next; | ||
109 | |||
110 | sq->sqe_head++; | ||
111 | submitted++; | ||
112 | } | ||
113 | |||
114 | if (!submitted) | ||
115 | return 0; | ||
116 | |||
117 | if (*sq->ktail != ktail) { | ||
118 | /* | ||
119 | * First write barrier ensures that the SQE stores are updated | ||
120 | * with the tail update. This is needed so that the kernel | ||
121 | * will never see a tail update without the preceeding sQE | ||
122 | * stores being done. | ||
123 | */ | ||
124 | write_barrier(); | ||
125 | *sq->ktail = ktail; | ||
126 | /* | ||
127 | * The kernel has the matching read barrier for reading the | ||
128 | * SQ tail. | ||
129 | */ | ||
130 | write_barrier(); | ||
131 | } | ||
132 | |||
133 | submit: | ||
134 | ret = io_uring_enter(ring->ring_fd, submitted, 0, | ||
135 | IORING_ENTER_GETEVENTS, NULL); | ||
136 | if (ret < 0) | ||
137 | return -errno; | ||
138 | |||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Return an sqe to fill. Application must later call io_uring_submit() | ||
144 | * when it's ready to tell the kernel about it. The caller may call this | ||
145 | * function multiple times before calling io_uring_submit(). | ||
146 | * | ||
147 | * Returns a vacant sqe, or NULL if we're full. | ||
148 | */ | ||
149 | struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) | ||
150 | { | ||
151 | struct io_uring_sq *sq = &ring->sq; | ||
152 | unsigned next = sq->sqe_tail + 1; | ||
153 | struct io_uring_sqe *sqe; | ||
154 | |||
155 | /* | ||
156 | * All sqes are used | ||
157 | */ | ||
158 | if (next - sq->sqe_head > *sq->kring_entries) | ||
159 | return NULL; | ||
160 | |||
161 | sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask]; | ||
162 | sq->sqe_tail = next; | ||
163 | return sqe; | ||
164 | } | ||