diff options
| author | Paton J. Lewis <palewis@adobe.com> | 2012-10-04 20:13:39 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-05 14:05:00 -0400 |
| commit | 03a7beb55b9fad363f0dd33e72ccf2d3e1c2a406 (patch) | |
| tree | e89cb2a2db5645600f28699ebf3b4a98195a3fb3 /tools/testing | |
| parent | a0a0a7a94c765f7219b57fa3b79389901bb0bc99 (diff) | |
epoll: support for disabling items, and a self-test app
Enhanced epoll_ctl to support EPOLL_CTL_DISABLE, which disables an epoll
item. If epoll_ctl doesn't return -EBUSY in this case, it is then safe to
delete the epoll item in a multi-threaded environment. Also added a new
test_epoll self- test app to both demonstrate the need for this feature
and test it.
Signed-off-by: Paton J. Lewis <palewis@adobe.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Paul Holland <pholland@adobe.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'tools/testing')
| -rw-r--r-- | tools/testing/selftests/Makefile | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/epoll/Makefile | 11 | ||||
| -rw-r--r-- | tools/testing/selftests/epoll/test_epoll.c | 344 |
3 files changed, 356 insertions, 1 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 85baf11e2acd..43480149119e 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug | 1 | TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll |
| 2 | 2 | ||
| 3 | all: | 3 | all: |
| 4 | for TARGET in $(TARGETS); do \ | 4 | for TARGET in $(TARGETS); do \ |
diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile new file mode 100644 index 000000000000..19806ed62f50 --- /dev/null +++ b/tools/testing/selftests/epoll/Makefile | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | # Makefile for epoll selftests | ||
| 2 | |||
| 3 | all: test_epoll | ||
| 4 | %: %.c | ||
| 5 | gcc -pthread -g -o $@ $^ | ||
| 6 | |||
| 7 | run_tests: all | ||
| 8 | ./test_epoll | ||
| 9 | |||
| 10 | clean: | ||
| 11 | $(RM) test_epoll | ||
diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c new file mode 100644 index 000000000000..e0fcff1e8331 --- /dev/null +++ b/tools/testing/selftests/epoll/test_epoll.c | |||
| @@ -0,0 +1,344 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/epoll/test_epoll.c | ||
| 3 | * | ||
| 4 | * Copyright 2012 Adobe Systems Incorporated | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | * | ||
| 11 | * Paton J. Lewis <palewis@adobe.com> | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <errno.h> | ||
| 16 | #include <fcntl.h> | ||
| 17 | #include <pthread.h> | ||
| 18 | #include <stdio.h> | ||
| 19 | #include <stdlib.h> | ||
| 20 | #include <unistd.h> | ||
| 21 | #include <sys/epoll.h> | ||
| 22 | #include <sys/socket.h> | ||
| 23 | |||
| 24 | /* | ||
| 25 | * A pointer to an epoll_item_private structure will be stored in the epoll | ||
| 26 | * item's event structure so that we can get access to the epoll_item_private | ||
| 27 | * data after calling epoll_wait: | ||
| 28 | */ | ||
| 29 | struct epoll_item_private { | ||
| 30 | int index; /* Position of this struct within the epoll_items array. */ | ||
| 31 | int fd; | ||
| 32 | uint32_t events; | ||
| 33 | pthread_mutex_t mutex; /* Guards the following variables... */ | ||
| 34 | int stop; | ||
| 35 | int status; /* Stores any error encountered while handling item. */ | ||
| 36 | /* The following variable allows us to test whether we have encountered | ||
| 37 | a problem while attempting to cancel and delete the associated | ||
| 38 | event. When the test program exits, 'deleted' should be exactly | ||
| 39 | one. If it is greater than one, then the failed test reflects a real | ||
| 40 | world situation where we would have tried to access the epoll item's | ||
| 41 | private data after deleting it: */ | ||
| 42 | int deleted; | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct epoll_item_private *epoll_items; | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Delete the specified item from the epoll set. In a real-world secneario this | ||
| 49 | * is where we would free the associated data structure, but in this testing | ||
| 50 | * environment we retain the structure so that we can test for double-deletion: | ||
| 51 | */ | ||
| 52 | void delete_item(int index) | ||
| 53 | { | ||
| 54 | __sync_fetch_and_add(&epoll_items[index].deleted, 1); | ||
| 55 | } | ||
| 56 | |||
| 57 | /* | ||
| 58 | * A pointer to a read_thread_data structure will be passed as the argument to | ||
| 59 | * each read thread: | ||
| 60 | */ | ||
| 61 | struct read_thread_data { | ||
| 62 | int stop; | ||
| 63 | int status; /* Indicates any error encountered by the read thread. */ | ||
| 64 | int epoll_set; | ||
| 65 | }; | ||
| 66 | |||
| 67 | /* | ||
| 68 | * The function executed by the read threads: | ||
| 69 | */ | ||
| 70 | void *read_thread_function(void *function_data) | ||
| 71 | { | ||
| 72 | struct read_thread_data *thread_data = | ||
| 73 | (struct read_thread_data *)function_data; | ||
| 74 | struct epoll_event event_data; | ||
| 75 | struct epoll_item_private *item_data; | ||
| 76 | char socket_data; | ||
| 77 | |||
| 78 | /* Handle events until we encounter an error or this thread's 'stop' | ||
| 79 | condition is set: */ | ||
| 80 | while (1) { | ||
| 81 | int result = epoll_wait(thread_data->epoll_set, | ||
| 82 | &event_data, | ||
| 83 | 1, /* Number of desired events */ | ||
| 84 | 1000); /* Timeout in ms */ | ||
| 85 | if (result < 0) { | ||
| 86 | /* Breakpoints signal all threads. Ignore that while | ||
| 87 | debugging: */ | ||
| 88 | if (errno == EINTR) | ||
| 89 | continue; | ||
| 90 | thread_data->status = errno; | ||
| 91 | return 0; | ||
| 92 | } else if (thread_data->stop) | ||
| 93 | return 0; | ||
| 94 | else if (result == 0) /* Timeout */ | ||
| 95 | continue; | ||
| 96 | |||
| 97 | /* We need the mutex here because checking for the stop | ||
| 98 | condition and re-enabling the epoll item need to be done | ||
| 99 | together as one atomic operation when EPOLL_CTL_DISABLE is | ||
| 100 | available: */ | ||
| 101 | item_data = (struct epoll_item_private *)event_data.data.ptr; | ||
| 102 | pthread_mutex_lock(&item_data->mutex); | ||
| 103 | |||
| 104 | /* Remove the item from the epoll set if we want to stop | ||
| 105 | handling that event: */ | ||
| 106 | if (item_data->stop) | ||
| 107 | delete_item(item_data->index); | ||
| 108 | else { | ||
| 109 | /* Clear the data that was written to the other end of | ||
| 110 | our non-blocking socket: */ | ||
| 111 | do { | ||
| 112 | if (read(item_data->fd, &socket_data, 1) < 1) { | ||
| 113 | if ((errno == EAGAIN) || | ||
| 114 | (errno == EWOULDBLOCK)) | ||
| 115 | break; | ||
| 116 | else | ||
| 117 | goto error_unlock; | ||
| 118 | } | ||
| 119 | } while (item_data->events & EPOLLET); | ||
| 120 | |||
| 121 | /* The item was one-shot, so re-enable it: */ | ||
| 122 | event_data.events = item_data->events; | ||
| 123 | if (epoll_ctl(thread_data->epoll_set, | ||
| 124 | EPOLL_CTL_MOD, | ||
| 125 | item_data->fd, | ||
| 126 | &event_data) < 0) | ||
| 127 | goto error_unlock; | ||
| 128 | } | ||
| 129 | |||
| 130 | pthread_mutex_unlock(&item_data->mutex); | ||
| 131 | } | ||
| 132 | |||
| 133 | error_unlock: | ||
| 134 | thread_data->status = item_data->status = errno; | ||
| 135 | pthread_mutex_unlock(&item_data->mutex); | ||
| 136 | return 0; | ||
| 137 | } | ||
| 138 | |||
| 139 | /* | ||
| 140 | * A pointer to a write_thread_data structure will be passed as the argument to | ||
| 141 | * the write thread: | ||
| 142 | */ | ||
| 143 | struct write_thread_data { | ||
| 144 | int stop; | ||
| 145 | int status; /* Indicates any error encountered by the write thread. */ | ||
| 146 | int n_fds; | ||
| 147 | int *fds; | ||
| 148 | }; | ||
| 149 | |||
| 150 | /* | ||
| 151 | * The function executed by the write thread. It writes a single byte to each | ||
| 152 | * socket in turn until the stop condition for this thread is set. If writing to | ||
| 153 | * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for | ||
| 154 | * the moment and just move on to the next socket in the list. We don't care | ||
| 155 | * about the order in which we deliver events to the epoll set. In fact we don't | ||
| 156 | * care about the data we're writing to the pipes at all; we just want to | ||
| 157 | * trigger epoll events: | ||
| 158 | */ | ||
| 159 | void *write_thread_function(void *function_data) | ||
| 160 | { | ||
| 161 | const char data = 'X'; | ||
| 162 | int index; | ||
| 163 | struct write_thread_data *thread_data = | ||
| 164 | (struct write_thread_data *)function_data; | ||
| 165 | while (!write_thread_data->stop) | ||
| 166 | for (index = 0; | ||
| 167 | !thread_data->stop && (index < thread_data->n_fds); | ||
| 168 | ++index) | ||
| 169 | if ((write(thread_data->fds[index], &data, 1) < 1) && | ||
| 170 | (errno != EAGAIN) && | ||
| 171 | (errno != EWOULDBLOCK)) { | ||
| 172 | write_thread_data->status = errno; | ||
| 173 | return; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | /* | ||
| 178 | * Arguments are currently ignored: | ||
| 179 | */ | ||
| 180 | int main(int argc, char **argv) | ||
| 181 | { | ||
| 182 | const int n_read_threads = 100; | ||
| 183 | const int n_epoll_items = 500; | ||
| 184 | int index; | ||
| 185 | int epoll_set = epoll_create1(0); | ||
| 186 | struct write_thread_data write_thread_data = { | ||
| 187 | 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int)) | ||
| 188 | }; | ||
| 189 | struct read_thread_data *read_thread_data = | ||
| 190 | malloc(n_read_threads * sizeof(struct read_thread_data)); | ||
| 191 | pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t)); | ||
| 192 | pthread_t write_thread; | ||
| 193 | |||
| 194 | printf("-----------------\n"); | ||
| 195 | printf("Runing test_epoll\n"); | ||
| 196 | printf("-----------------\n"); | ||
| 197 | |||
| 198 | epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private)); | ||
| 199 | |||
| 200 | if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 || | ||
| 201 | read_thread_data == 0 || read_threads == 0) | ||
| 202 | goto error; | ||
| 203 | |||
| 204 | if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { | ||
| 205 | printf("Error: please run this test on a multi-core system.\n"); | ||
| 206 | goto error; | ||
| 207 | } | ||
| 208 | |||
| 209 | /* Create the socket pairs and epoll items: */ | ||
| 210 | for (index = 0; index < n_epoll_items; ++index) { | ||
| 211 | int socket_pair[2]; | ||
| 212 | struct epoll_event event_data; | ||
| 213 | if (socketpair(AF_UNIX, | ||
| 214 | SOCK_STREAM | SOCK_NONBLOCK, | ||
| 215 | 0, | ||
| 216 | socket_pair) < 0) | ||
| 217 | goto error; | ||
| 218 | write_thread_data.fds[index] = socket_pair[0]; | ||
| 219 | epoll_items[index].index = index; | ||
| 220 | epoll_items[index].fd = socket_pair[1]; | ||
| 221 | if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0) | ||
| 222 | goto error; | ||
| 223 | /* We always use EPOLLONESHOT because this test is currently | ||
| 224 | structured to demonstrate the need for EPOLL_CTL_DISABLE, | ||
| 225 | which only produces useful information in the EPOLLONESHOT | ||
| 226 | case (without EPOLLONESHOT, calling epoll_ctl with | ||
| 227 | EPOLL_CTL_DISABLE will never return EBUSY). If support for | ||
| 228 | testing events without EPOLLONESHOT is desired, it should | ||
| 229 | probably be implemented in a separate unit test. */ | ||
| 230 | epoll_items[index].events = EPOLLIN | EPOLLONESHOT; | ||
| 231 | if (index < n_epoll_items / 2) | ||
| 232 | epoll_items[index].events |= EPOLLET; | ||
| 233 | epoll_items[index].stop = 0; | ||
| 234 | epoll_items[index].status = 0; | ||
| 235 | epoll_items[index].deleted = 0; | ||
| 236 | event_data.events = epoll_items[index].events; | ||
| 237 | event_data.data.ptr = &epoll_items[index]; | ||
| 238 | if (epoll_ctl(epoll_set, | ||
| 239 | EPOLL_CTL_ADD, | ||
| 240 | epoll_items[index].fd, | ||
| 241 | &event_data) < 0) | ||
| 242 | goto error; | ||
| 243 | } | ||
| 244 | |||
| 245 | /* Create and start the read threads: */ | ||
| 246 | for (index = 0; index < n_read_threads; ++index) { | ||
| 247 | read_thread_data[index].stop = 0; | ||
| 248 | read_thread_data[index].status = 0; | ||
| 249 | read_thread_data[index].epoll_set = epoll_set; | ||
| 250 | if (pthread_create(&read_threads[index], | ||
| 251 | NULL, | ||
| 252 | read_thread_function, | ||
| 253 | &read_thread_data[index]) != 0) | ||
| 254 | goto error; | ||
| 255 | } | ||
| 256 | |||
| 257 | if (pthread_create(&write_thread, | ||
| 258 | NULL, | ||
| 259 | write_thread_function, | ||
| 260 | &write_thread_data) != 0) | ||
| 261 | goto error; | ||
| 262 | |||
| 263 | /* Cancel all event pollers: */ | ||
| 264 | #ifdef EPOLL_CTL_DISABLE | ||
| 265 | for (index = 0; index < n_epoll_items; ++index) { | ||
| 266 | pthread_mutex_lock(&epoll_items[index].mutex); | ||
| 267 | ++epoll_items[index].stop; | ||
| 268 | if (epoll_ctl(epoll_set, | ||
| 269 | EPOLL_CTL_DISABLE, | ||
| 270 | epoll_items[index].fd, | ||
| 271 | NULL) == 0) | ||
| 272 | delete_item(index); | ||
| 273 | else if (errno != EBUSY) { | ||
| 274 | pthread_mutex_unlock(&epoll_items[index].mutex); | ||
| 275 | goto error; | ||
| 276 | } | ||
| 277 | /* EBUSY means events were being handled; allow the other thread | ||
| 278 | to delete the item. */ | ||
| 279 | pthread_mutex_unlock(&epoll_items[index].mutex); | ||
| 280 | } | ||
| 281 | #else | ||
| 282 | for (index = 0; index < n_epoll_items; ++index) { | ||
| 283 | pthread_mutex_lock(&epoll_items[index].mutex); | ||
| 284 | ++epoll_items[index].stop; | ||
| 285 | pthread_mutex_unlock(&epoll_items[index].mutex); | ||
| 286 | /* Wait in case a thread running read_thread_function is | ||
| 287 | currently executing code between epoll_wait and | ||
| 288 | pthread_mutex_lock with this item. Note that a longer delay | ||
| 289 | would make double-deletion less likely (at the expense of | ||
| 290 | performance), but there is no guarantee that any delay would | ||
| 291 | ever be sufficient. Note also that we delete all event | ||
| 292 | pollers at once for testing purposes, but in a real-world | ||
| 293 | environment we are likely to want to be able to cancel event | ||
| 294 | pollers at arbitrary times. Therefore we can't improve this | ||
| 295 | situation by just splitting this loop into two loops | ||
| 296 | (i.e. signal 'stop' for all items, sleep, and then delete all | ||
| 297 | items). We also can't fix the problem via EPOLL_CTL_DEL | ||
| 298 | because that command can't prevent the case where some other | ||
| 299 | thread is executing read_thread_function within the region | ||
| 300 | mentioned above: */ | ||
| 301 | usleep(1); | ||
| 302 | pthread_mutex_lock(&epoll_items[index].mutex); | ||
| 303 | if (!epoll_items[index].deleted) | ||
| 304 | delete_item(index); | ||
| 305 | pthread_mutex_unlock(&epoll_items[index].mutex); | ||
| 306 | } | ||
| 307 | #endif | ||
| 308 | |||
| 309 | /* Shut down the read threads: */ | ||
| 310 | for (index = 0; index < n_read_threads; ++index) | ||
| 311 | __sync_fetch_and_add(&read_thread_data[index].stop, 1); | ||
| 312 | for (index = 0; index < n_read_threads; ++index) { | ||
| 313 | if (pthread_join(read_threads[index], NULL) != 0) | ||
| 314 | goto error; | ||
| 315 | if (read_thread_data[index].status) | ||
| 316 | goto error; | ||
| 317 | } | ||
| 318 | |||
| 319 | /* Shut down the write thread: */ | ||
| 320 | __sync_fetch_and_add(&write_thread_data.stop, 1); | ||
| 321 | if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status) | ||
| 322 | goto error; | ||
| 323 | |||
| 324 | /* Check for final error conditions: */ | ||
| 325 | for (index = 0; index < n_epoll_items; ++index) { | ||
| 326 | if (epoll_items[index].status != 0) | ||
| 327 | goto error; | ||
| 328 | if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0) | ||
| 329 | goto error; | ||
| 330 | } | ||
| 331 | for (index = 0; index < n_epoll_items; ++index) | ||
| 332 | if (epoll_items[index].deleted != 1) { | ||
| 333 | printf("Error: item data deleted %1d times.\n", | ||
| 334 | epoll_items[index].deleted); | ||
| 335 | goto error; | ||
| 336 | } | ||
| 337 | |||
| 338 | printf("[PASS]\n"); | ||
| 339 | return 0; | ||
| 340 | |||
| 341 | error: | ||
| 342 | printf("[FAIL]\n"); | ||
| 343 | return errno; | ||
| 344 | } | ||
