aboutsummaryrefslogtreecommitdiffstats
path: root/arch/um/kernel/irq.c
diff options
context:
space:
mode:
authorAnton Ivanov <anton.ivanov@cambridgegreys.com>2017-11-20 16:17:58 -0500
committerRichard Weinberger <richard@nod.at>2018-02-19 13:38:51 -0500
commitff6a17989c08b0bb0fd490cc500b084581b3a9b9 (patch)
tree1f87a8f21ad3659e61e4958faece57994a842cde /arch/um/kernel/irq.c
parent4d1a535b8ec5e74b42dfd9dc809142653b2597f6 (diff)
Epoll based IRQ controller
1. Removes the need to walk the IRQ/Device list to determine who triggered the IRQ. 2. Improves scalability (up to several times performance improvement for cases with 10s of devices). 3. Improves UML baseline IO performance for one disk + one NIC use case by up to 10%. 4. Introduces write poll triggered IRQs. 5. Prerequisite for introducing high performance mmesg family of functions in network IO. 6. Fixes RNG shutdown which was leaking a file descriptor Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com> Signed-off-by: Richard Weinberger <richard@nod.at>
Diffstat (limited to 'arch/um/kernel/irq.c')
-rw-r--r--arch/um/kernel/irq.c460
1 files changed, 297 insertions, 163 deletions
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb9350d47e..980148d56537 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,6 @@
1/* 1/*
2 * Copyright (C) 2017 - Cambridge Greys Ltd
3 * Copyright (C) 2011 - 2014 Cisco Systems Inc
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 4 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL 5 * Licensed under the GPL
4 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: 6 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -16,243 +18,361 @@
16#include <as-layout.h> 18#include <as-layout.h>
17#include <kern_util.h> 19#include <kern_util.h>
18#include <os.h> 20#include <os.h>
21#include <irq_user.h>
19 22
20/* 23
21 * This list is accessed under irq_lock, except in sigio_handler, 24/* When epoll triggers we do not know why it did so
22 * where it is safe from being modified. IRQ handlers won't change it - 25 * we can also have different IRQs for read and write.
23 * if an IRQ source has vanished, it will be freed by free_irqs just 26 * This is why we keep a small irq_fd array for each fd -
24 * before returning from sigio_handler. That will process a separate 27 * one entry per IRQ type
25 * list of irqs to free, with its own locking, coming back here to
26 * remove list elements, taking the irq_lock to do so.
27 */ 28 */
28static struct irq_fd *active_fds = NULL;
29static struct irq_fd **last_irq_ptr = &active_fds;
30 29
31extern void free_irqs(void); 30struct irq_entry {
31 struct irq_entry *next;
32 int fd;
33 struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
34};
35
36static struct irq_entry *active_fds;
37
38static DEFINE_SPINLOCK(irq_lock);
39
40static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
41{
42/*
43 * irq->active guards against reentry
44 * irq->pending accumulates pending requests
45 * if pending is raised the irq_handler is re-run
46 * until pending is cleared
47 */
48 if (irq->active) {
49 irq->active = false;
50 do {
51 irq->pending = false;
52 do_IRQ(irq->irq, regs);
53 } while (irq->pending && (!irq->purge));
54 if (!irq->purge)
55 irq->active = true;
56 } else {
57 irq->pending = true;
58 }
59}
32 60
33void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 61void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
34{ 62{
35 struct irq_fd *irq_fd; 63 struct irq_entry *irq_entry;
36 int n; 64 struct irq_fd *irq;
65
66 int n, i, j;
37 67
38 while (1) { 68 while (1) {
39 n = os_waiting_for_events(active_fds); 69 /* This is now lockless - epoll keeps back-referencesto the irqs
70 * which have trigger it so there is no need to walk the irq
71 * list and lock it every time. We avoid locking by turning off
72 * IO for a specific fd by executing os_del_epoll_fd(fd) before
73 * we do any changes to the actual data structures
74 */
75 n = os_waiting_for_events_epoll();
76
40 if (n <= 0) { 77 if (n <= 0) {
41 if (n == -EINTR) 78 if (n == -EINTR)
42 continue; 79 continue;
43 else break; 80 else
81 break;
44 } 82 }
45 83
46 for (irq_fd = active_fds; irq_fd != NULL; 84 for (i = 0; i < n ; i++) {
47 irq_fd = irq_fd->next) { 85 /* Epoll back reference is the entry with 3 irq_fd
48 if (irq_fd->current_events != 0) { 86 * leaves - one for each irq type.
49 irq_fd->current_events = 0; 87 */
50 do_IRQ(irq_fd->irq, regs); 88 irq_entry = (struct irq_entry *)
89 os_epoll_get_data_pointer(i);
90 for (j = 0; j < MAX_IRQ_TYPE ; j++) {
91 irq = irq_entry->irq_array[j];
92 if (irq == NULL)
93 continue;
94 if (os_epoll_triggered(i, irq->events) > 0)
95 irq_io_loop(irq, regs);
96 if (irq->purge) {
97 irq_entry->irq_array[j] = NULL;
98 kfree(irq);
99 }
51 } 100 }
52 } 101 }
53 } 102 }
103}
104
105static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
106{
107 int i;
108 int events = 0;
109 struct irq_fd *irq;
54 110
55 free_irqs(); 111 for (i = 0; i < MAX_IRQ_TYPE ; i++) {
112 irq = irq_entry->irq_array[i];
113 if (irq != NULL)
114 events = irq->events | events;
115 }
116 if (events > 0) {
117 /* os_add_epoll will call os_mod_epoll if this already exists */
118 return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
119 }
120 /* No events - delete */
121 return os_del_epoll_fd(irq_entry->fd);
56} 122}
57 123
58static DEFINE_SPINLOCK(irq_lock); 124
59 125
60static int activate_fd(int irq, int fd, int type, void *dev_id) 126static int activate_fd(int irq, int fd, int type, void *dev_id)
61{ 127{
62 struct pollfd *tmp_pfd; 128 struct irq_fd *new_fd;
63 struct irq_fd *new_fd, *irq_fd; 129 struct irq_entry *irq_entry;
130 int i, err, events;
64 unsigned long flags; 131 unsigned long flags;
65 int events, err, n;
66 132
67 err = os_set_fd_async(fd); 133 err = os_set_fd_async(fd);
68 if (err < 0) 134 if (err < 0)
69 goto out; 135 goto out;
70 136
71 err = -ENOMEM; 137 spin_lock_irqsave(&irq_lock, flags);
72 new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
73 if (new_fd == NULL)
74 goto out;
75 138
76 if (type == IRQ_READ) 139 /* Check if we have an entry for this fd */
77 events = UM_POLLIN | UM_POLLPRI;
78 else events = UM_POLLOUT;
79 *new_fd = ((struct irq_fd) { .next = NULL,
80 .id = dev_id,
81 .fd = fd,
82 .type = type,
83 .irq = irq,
84 .events = events,
85 .current_events = 0 } );
86 140
87 err = -EBUSY; 141 err = -EBUSY;
88 spin_lock_irqsave(&irq_lock, flags); 142 for (irq_entry = active_fds;
89 for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { 143 irq_entry != NULL; irq_entry = irq_entry->next) {
90 if ((irq_fd->fd == fd) && (irq_fd->type == type)) { 144 if (irq_entry->fd == fd)
91 printk(KERN_ERR "Registering fd %d twice\n", fd); 145 break;
92 printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq); 146 }
93 printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id, 147
94 dev_id); 148 if (irq_entry == NULL) {
149 /* This needs to be atomic as it may be called from an
150 * IRQ context.
151 */
152 irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
153 if (irq_entry == NULL) {
154 printk(KERN_ERR
155 "Failed to allocate new IRQ entry\n");
95 goto out_unlock; 156 goto out_unlock;
96 } 157 }
158 irq_entry->fd = fd;
159 for (i = 0; i < MAX_IRQ_TYPE; i++)
160 irq_entry->irq_array[i] = NULL;
161 irq_entry->next = active_fds;
162 active_fds = irq_entry;
97 } 163 }
98 164
99 if (type == IRQ_WRITE) 165 /* Check if we are trying to re-register an interrupt for a
100 fd = -1; 166 * particular fd
101 167 */
102 tmp_pfd = NULL;
103 n = 0;
104 168
105 while (1) { 169 if (irq_entry->irq_array[type] != NULL) {
106 n = os_create_pollfd(fd, events, tmp_pfd, n); 170 printk(KERN_ERR
107 if (n == 0) 171 "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
108 break; 172 irq, fd, type, dev_id
173 );
174 goto out_unlock;
175 } else {
176 /* New entry for this fd */
177
178 err = -ENOMEM;
179 new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
180 if (new_fd == NULL)
181 goto out_unlock;
109 182
110 /* 183 events = os_event_mask(type);
111 * n > 0 184
112 * It means we couldn't put new pollfd to current pollfds 185 *new_fd = ((struct irq_fd) {
113 * and tmp_fds is NULL or too small for new pollfds array. 186 .id = dev_id,
114 * Needed size is equal to n as minimum. 187 .irq = irq,
115 * 188 .type = type,
116 * Here we have to drop the lock in order to call 189 .events = events,
117 * kmalloc, which might sleep. 190 .active = true,
118 * If something else came in and changed the pollfds array 191 .pending = false,
119 * so we will not be able to put new pollfd struct to pollfds 192 .purge = false
120 * then we free the buffer tmp_fds and try again. 193 });
194 /* Turn off any IO on this fd - allows us to
195 * avoid locking the IRQ loop
121 */ 196 */
122 spin_unlock_irqrestore(&irq_lock, flags); 197 os_del_epoll_fd(irq_entry->fd);
123 kfree(tmp_pfd); 198 irq_entry->irq_array[type] = new_fd;
124
125 tmp_pfd = kmalloc(n, GFP_KERNEL);
126 if (tmp_pfd == NULL)
127 goto out_kfree;
128
129 spin_lock_irqsave(&irq_lock, flags);
130 } 199 }
131 200
132 *last_irq_ptr = new_fd; 201 /* Turn back IO on with the correct (new) IO event mask */
133 last_irq_ptr = &new_fd->next; 202 assign_epoll_events_to_irq(irq_entry);
134
135 spin_unlock_irqrestore(&irq_lock, flags); 203 spin_unlock_irqrestore(&irq_lock, flags);
136 204 maybe_sigio_broken(fd, (type != IRQ_NONE));
137 /*
138 * This calls activate_fd, so it has to be outside the critical
139 * section.
140 */
141 maybe_sigio_broken(fd, (type == IRQ_READ));
142 205
143 return 0; 206 return 0;
144 207out_unlock:
145 out_unlock:
146 spin_unlock_irqrestore(&irq_lock, flags); 208 spin_unlock_irqrestore(&irq_lock, flags);
147 out_kfree: 209out:
148 kfree(new_fd);
149 out:
150 return err; 210 return err;
151} 211}
152 212
153static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) 213/*
214 * Walk the IRQ list and dispose of any unused entries.
215 * Should be done under irq_lock.
216 */
217
218static void garbage_collect_irq_entries(void)
154{ 219{
155 unsigned long flags; 220 int i;
221 bool reap;
222 struct irq_entry *walk;
223 struct irq_entry *previous = NULL;
224 struct irq_entry *to_free;
156 225
157 spin_lock_irqsave(&irq_lock, flags); 226 if (active_fds == NULL)
158 os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); 227 return;
159 spin_unlock_irqrestore(&irq_lock, flags); 228 walk = active_fds;
229 while (walk != NULL) {
230 reap = true;
231 for (i = 0; i < MAX_IRQ_TYPE ; i++) {
232 if (walk->irq_array[i] != NULL) {
233 reap = false;
234 break;
235 }
236 }
237 if (reap) {
238 if (previous == NULL)
239 active_fds = walk->next;
240 else
241 previous->next = walk->next;
242 to_free = walk;
243 } else {
244 to_free = NULL;
245 }
246 walk = walk->next;
247 if (to_free != NULL)
248 kfree(to_free);
249 }
160} 250}
161 251
162struct irq_and_dev { 252/*
163 int irq; 253 * Walk the IRQ list and get the descriptor for our FD
164 void *dev; 254 */
165};
166 255
167static int same_irq_and_dev(struct irq_fd *irq, void *d) 256static struct irq_entry *get_irq_entry_by_fd(int fd)
168{ 257{
169 struct irq_and_dev *data = d; 258 struct irq_entry *walk = active_fds;
170 259
171 return ((irq->irq == data->irq) && (irq->id == data->dev)); 260 while (walk != NULL) {
261 if (walk->fd == fd)
262 return walk;
263 walk = walk->next;
264 }
265 return NULL;
172} 266}
173 267
174static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
175{
176 struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq,
177 .dev = dev });
178 268
179 free_irq_by_cb(same_irq_and_dev, &data); 269/*
180} 270 * Walk the IRQ list and dispose of an entry for a specific
271 * device, fd and number. Note - if sharing an IRQ for read
272 * and writefor the same FD it will be disposed in either case.
273 * If this behaviour is undesirable use different IRQ ids.
274 */
181 275
182static int same_fd(struct irq_fd *irq, void *fd) 276#define IGNORE_IRQ 1
183{ 277#define IGNORE_DEV (1<<1)
184 return (irq->fd == *((int *)fd));
185}
186 278
187void free_irq_by_fd(int fd) 279static void do_free_by_irq_and_dev(
280 struct irq_entry *irq_entry,
281 unsigned int irq,
282 void *dev,
283 int flags
284)
188{ 285{
189 free_irq_by_cb(same_fd, &fd); 286 int i;
287 struct irq_fd *to_free;
288
289 for (i = 0; i < MAX_IRQ_TYPE ; i++) {
290 if (irq_entry->irq_array[i] != NULL) {
291 if (
292 ((flags & IGNORE_IRQ) ||
293 (irq_entry->irq_array[i]->irq == irq)) &&
294 ((flags & IGNORE_DEV) ||
295 (irq_entry->irq_array[i]->id == dev))
296 ) {
297 /* Turn off any IO on this fd - allows us to
298 * avoid locking the IRQ loop
299 */
300 os_del_epoll_fd(irq_entry->fd);
301 to_free = irq_entry->irq_array[i];
302 irq_entry->irq_array[i] = NULL;
303 assign_epoll_events_to_irq(irq_entry);
304 if (to_free->active)
305 to_free->purge = true;
306 else
307 kfree(to_free);
308 }
309 }
310 }
190} 311}
191 312
192/* Must be called with irq_lock held */ 313void free_irq_by_fd(int fd)
193static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
194{ 314{
195 struct irq_fd *irq; 315 struct irq_entry *to_free;
196 int i = 0; 316 unsigned long flags;
197 int fdi;
198 317
199 for (irq = active_fds; irq != NULL; irq = irq->next) { 318 spin_lock_irqsave(&irq_lock, flags);
200 if ((irq->fd == fd) && (irq->irq == irqnum)) 319 to_free = get_irq_entry_by_fd(fd);
201 break; 320 if (to_free != NULL) {
202 i++; 321 do_free_by_irq_and_dev(
203 } 322 to_free,
204 if (irq == NULL) { 323 -1,
205 printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n", 324 NULL,
206 fd); 325 IGNORE_IRQ | IGNORE_DEV
207 goto out; 326 );
208 }
209 fdi = os_get_pollfd(i);
210 if ((fdi != -1) && (fdi != fd)) {
211 printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
212 "and pollfds, fd %d vs %d, need %d\n", irq->fd,
213 fdi, fd);
214 irq = NULL;
215 goto out;
216 } 327 }
217 *index_out = i; 328 garbage_collect_irq_entries();
218 out: 329 spin_unlock_irqrestore(&irq_lock, flags);
219 return irq;
220} 330}
221 331
222void reactivate_fd(int fd, int irqnum) 332static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
223{ 333{
224 struct irq_fd *irq; 334 struct irq_entry *to_free;
225 unsigned long flags; 335 unsigned long flags;
226 int i;
227 336
228 spin_lock_irqsave(&irq_lock, flags); 337 spin_lock_irqsave(&irq_lock, flags);
229 irq = find_irq_by_fd(fd, irqnum, &i); 338 to_free = active_fds;
230 if (irq == NULL) { 339 while (to_free != NULL) {
231 spin_unlock_irqrestore(&irq_lock, flags); 340 do_free_by_irq_and_dev(
232 return; 341 to_free,
342 irq,
343 dev,
344 0
345 );
346 to_free = to_free->next;
233 } 347 }
234 os_set_pollfd(i, irq->fd); 348 garbage_collect_irq_entries();
235 spin_unlock_irqrestore(&irq_lock, flags); 349 spin_unlock_irqrestore(&irq_lock, flags);
350}
236 351
237 add_sigio_fd(fd); 352
353void reactivate_fd(int fd, int irqnum)
354{
355 /** NOP - we do auto-EOI now **/
238} 356}
239 357
240void deactivate_fd(int fd, int irqnum) 358void deactivate_fd(int fd, int irqnum)
241{ 359{
242 struct irq_fd *irq; 360 struct irq_entry *to_free;
243 unsigned long flags; 361 unsigned long flags;
244 int i;
245 362
363 os_del_epoll_fd(fd);
246 spin_lock_irqsave(&irq_lock, flags); 364 spin_lock_irqsave(&irq_lock, flags);
247 irq = find_irq_by_fd(fd, irqnum, &i); 365 to_free = get_irq_entry_by_fd(fd);
248 if (irq == NULL) { 366 if (to_free != NULL) {
249 spin_unlock_irqrestore(&irq_lock, flags); 367 do_free_by_irq_and_dev(
250 return; 368 to_free,
369 irqnum,
370 NULL,
371 IGNORE_DEV
372 );
251 } 373 }
252 374 garbage_collect_irq_entries();
253 os_set_pollfd(i, -1);
254 spin_unlock_irqrestore(&irq_lock, flags); 375 spin_unlock_irqrestore(&irq_lock, flags);
255
256 ignore_sigio_fd(fd); 376 ignore_sigio_fd(fd);
257} 377}
258EXPORT_SYMBOL(deactivate_fd); 378EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +385,28 @@ EXPORT_SYMBOL(deactivate_fd);
265 */ 385 */
266int deactivate_all_fds(void) 386int deactivate_all_fds(void)
267{ 387{
268 struct irq_fd *irq; 388 unsigned long flags;
269 int err; 389 struct irq_entry *to_free;
270 390
271 for (irq = active_fds; irq != NULL; irq = irq->next) { 391 spin_lock_irqsave(&irq_lock, flags);
272 err = os_clear_fd_async(irq->fd); 392 /* Stop IO. The IRQ loop has no lock so this is our
273 if (err) 393 * only way of making sure we are safe to dispose
274 return err; 394 * of all IRQ handlers
275 } 395 */
276 /* If there is a signal already queued, after unblocking ignore it */
277 os_set_ioignore(); 396 os_set_ioignore();
278 397 to_free = active_fds;
398 while (to_free != NULL) {
399 do_free_by_irq_and_dev(
400 to_free,
401 -1,
402 NULL,
403 IGNORE_IRQ | IGNORE_DEV
404 );
405 to_free = to_free->next;
406 }
407 garbage_collect_irq_entries();
408 spin_unlock_irqrestore(&irq_lock, flags);
409 os_close_epoll_fd();
279 return 0; 410 return 0;
280} 411}
281 412
@@ -353,8 +484,11 @@ void __init init_IRQ(void)
353 484
354 irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); 485 irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
355 486
487
356 for (i = 1; i < NR_IRQS; i++) 488 for (i = 1; i < NR_IRQS; i++)
357 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); 489 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
490 /* Initialize EPOLL Loop */
491 os_setup_epoll();
358} 492}
359 493
360/* 494/*