diff options
author | Eric Wong <normalperson@yhbt.net> | 2013-04-30 18:27:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 20:04:04 -0400 |
commit | eea1d585917c538d90bc26fda5d8e53796feada2 (patch) | |
tree | 2a0d05e84a0b036b6174b27d6359f11e89f56b46 /fs/eventpoll.c | |
parent | 39732ca5af4b09f4db561149041ddad7211019a5 (diff) |
epoll: use RCU to protect wakeup_source in epitem
This prevents wakeup_source destruction when a user hits the item with
EPOLL_CTL_MOD while ep_poll_callback is running.
Tested with CONFIG_SPARSE_RCU_POINTER=y and "make fs/eventpoll.o C=2"
Signed-off-by: Eric Wong <normalperson@yhbt.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: NeilBrown <neilb@suse.de>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 92 |
1 files changed, 71 insertions, 21 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0e5eda068520..a3acf936c72a 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -160,7 +160,7 @@ struct epitem { | |||
160 | struct list_head fllink; | 160 | struct list_head fllink; |
161 | 161 | ||
162 | /* wakeup_source used when EPOLLWAKEUP is set */ | 162 | /* wakeup_source used when EPOLLWAKEUP is set */ |
163 | struct wakeup_source *ws; | 163 | struct wakeup_source __rcu *ws; |
164 | 164 | ||
165 | /* The structure that describe the interested events and the source fd */ | 165 | /* The structure that describe the interested events and the source fd */ |
166 | struct epoll_event event; | 166 | struct epoll_event event; |
@@ -538,6 +538,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) | |||
538 | } | 538 | } |
539 | } | 539 | } |
540 | 540 | ||
541 | /* call only when ep->mtx is held */ | ||
542 | static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi) | ||
543 | { | ||
544 | return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx)); | ||
545 | } | ||
546 | |||
547 | /* call only when ep->mtx is held */ | ||
548 | static inline void ep_pm_stay_awake(struct epitem *epi) | ||
549 | { | ||
550 | struct wakeup_source *ws = ep_wakeup_source(epi); | ||
551 | |||
552 | if (ws) | ||
553 | __pm_stay_awake(ws); | ||
554 | } | ||
555 | |||
556 | static inline bool ep_has_wakeup_source(struct epitem *epi) | ||
557 | { | ||
558 | return rcu_access_pointer(epi->ws) ? true : false; | ||
559 | } | ||
560 | |||
561 | /* call when ep->mtx cannot be held (ep_poll_callback) */ | ||
562 | static inline void ep_pm_stay_awake_rcu(struct epitem *epi) | ||
563 | { | ||
564 | struct wakeup_source *ws; | ||
565 | |||
566 | rcu_read_lock(); | ||
567 | ws = rcu_dereference(epi->ws); | ||
568 | if (ws) | ||
569 | __pm_stay_awake(ws); | ||
570 | rcu_read_unlock(); | ||
571 | } | ||
572 | |||
541 | /** | 573 | /** |
542 | * ep_scan_ready_list - Scans the ready list in a way that makes possible for | 574 | * ep_scan_ready_list - Scans the ready list in a way that makes possible for |
543 | * the scan code, to call f_op->poll(). Also allows for | 575 | * the scan code, to call f_op->poll(). Also allows for |
@@ -601,7 +633,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
601 | */ | 633 | */ |
602 | if (!ep_is_linked(&epi->rdllink)) { | 634 | if (!ep_is_linked(&epi->rdllink)) { |
603 | list_add_tail(&epi->rdllink, &ep->rdllist); | 635 | list_add_tail(&epi->rdllink, &ep->rdllist); |
604 | __pm_stay_awake(epi->ws); | 636 | ep_pm_stay_awake(epi); |
605 | } | 637 | } |
606 | } | 638 | } |
607 | /* | 639 | /* |
@@ -670,7 +702,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) | |||
670 | list_del_init(&epi->rdllink); | 702 | list_del_init(&epi->rdllink); |
671 | spin_unlock_irqrestore(&ep->lock, flags); | 703 | spin_unlock_irqrestore(&ep->lock, flags); |
672 | 704 | ||
673 | wakeup_source_unregister(epi->ws); | 705 | wakeup_source_unregister(ep_wakeup_source(epi)); |
674 | 706 | ||
675 | /* At this point it is safe to free the eventpoll item */ | 707 | /* At this point it is safe to free the eventpoll item */ |
676 | kmem_cache_free(epi_cache, epi); | 708 | kmem_cache_free(epi_cache, epi); |
@@ -754,7 +786,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
754 | * callback, but it's not actually ready, as far as | 786 | * callback, but it's not actually ready, as far as |
755 | * caller requested events goes. We can remove it here. | 787 | * caller requested events goes. We can remove it here. |
756 | */ | 788 | */ |
757 | __pm_relax(epi->ws); | 789 | __pm_relax(ep_wakeup_source(epi)); |
758 | list_del_init(&epi->rdllink); | 790 | list_del_init(&epi->rdllink); |
759 | } | 791 | } |
760 | } | 792 | } |
@@ -986,7 +1018,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
986 | /* If this file is already in the ready list we exit soon */ | 1018 | /* If this file is already in the ready list we exit soon */ |
987 | if (!ep_is_linked(&epi->rdllink)) { | 1019 | if (!ep_is_linked(&epi->rdllink)) { |
988 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1020 | list_add_tail(&epi->rdllink, &ep->rdllist); |
989 | __pm_stay_awake(epi->ws); | 1021 | ep_pm_stay_awake_rcu(epi); |
990 | } | 1022 | } |
991 | 1023 | ||
992 | /* | 1024 | /* |
@@ -1148,6 +1180,7 @@ static int reverse_path_check(void) | |||
1148 | static int ep_create_wakeup_source(struct epitem *epi) | 1180 | static int ep_create_wakeup_source(struct epitem *epi) |
1149 | { | 1181 | { |
1150 | const char *name; | 1182 | const char *name; |
1183 | struct wakeup_source *ws; | ||
1151 | 1184 | ||
1152 | if (!epi->ep->ws) { | 1185 | if (!epi->ep->ws) { |
1153 | epi->ep->ws = wakeup_source_register("eventpoll"); | 1186 | epi->ep->ws = wakeup_source_register("eventpoll"); |
@@ -1156,17 +1189,29 @@ static int ep_create_wakeup_source(struct epitem *epi) | |||
1156 | } | 1189 | } |
1157 | 1190 | ||
1158 | name = epi->ffd.file->f_path.dentry->d_name.name; | 1191 | name = epi->ffd.file->f_path.dentry->d_name.name; |
1159 | epi->ws = wakeup_source_register(name); | 1192 | ws = wakeup_source_register(name); |
1160 | if (!epi->ws) | 1193 | |
1194 | if (!ws) | ||
1161 | return -ENOMEM; | 1195 | return -ENOMEM; |
1196 | rcu_assign_pointer(epi->ws, ws); | ||
1162 | 1197 | ||
1163 | return 0; | 1198 | return 0; |
1164 | } | 1199 | } |
1165 | 1200 | ||
1166 | static void ep_destroy_wakeup_source(struct epitem *epi) | 1201 | /* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */ |
1202 | static noinline void ep_destroy_wakeup_source(struct epitem *epi) | ||
1167 | { | 1203 | { |
1168 | wakeup_source_unregister(epi->ws); | 1204 | struct wakeup_source *ws = ep_wakeup_source(epi); |
1169 | epi->ws = NULL; | 1205 | |
1206 | rcu_assign_pointer(epi->ws, NULL); | ||
1207 | |||
1208 | /* | ||
1209 | * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is | ||
1210 | * used internally by wakeup_source_remove, too (called by | ||
1211 | * wakeup_source_unregister), so we cannot use call_rcu | ||
1212 | */ | ||
1213 | synchronize_rcu(); | ||
1214 | wakeup_source_unregister(ws); | ||
1170 | } | 1215 | } |
1171 | 1216 | ||
1172 | /* | 1217 | /* |
@@ -1201,7 +1246,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1201 | if (error) | 1246 | if (error) |
1202 | goto error_create_wakeup_source; | 1247 | goto error_create_wakeup_source; |
1203 | } else { | 1248 | } else { |
1204 | epi->ws = NULL; | 1249 | RCU_INIT_POINTER(epi->ws, NULL); |
1205 | } | 1250 | } |
1206 | 1251 | ||
1207 | /* Initialize the poll table using the queue callback */ | 1252 | /* Initialize the poll table using the queue callback */ |
@@ -1249,7 +1294,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1249 | /* If the file is already "ready" we drop it inside the ready list */ | 1294 | /* If the file is already "ready" we drop it inside the ready list */ |
1250 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { | 1295 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { |
1251 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1296 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1252 | __pm_stay_awake(epi->ws); | 1297 | ep_pm_stay_awake(epi); |
1253 | 1298 | ||
1254 | /* Notify waiting tasks that events are available */ | 1299 | /* Notify waiting tasks that events are available */ |
1255 | if (waitqueue_active(&ep->wq)) | 1300 | if (waitqueue_active(&ep->wq)) |
@@ -1290,7 +1335,7 @@ error_unregister: | |||
1290 | list_del_init(&epi->rdllink); | 1335 | list_del_init(&epi->rdllink); |
1291 | spin_unlock_irqrestore(&ep->lock, flags); | 1336 | spin_unlock_irqrestore(&ep->lock, flags); |
1292 | 1337 | ||
1293 | wakeup_source_unregister(epi->ws); | 1338 | wakeup_source_unregister(ep_wakeup_source(epi)); |
1294 | 1339 | ||
1295 | error_create_wakeup_source: | 1340 | error_create_wakeup_source: |
1296 | kmem_cache_free(epi_cache, epi); | 1341 | kmem_cache_free(epi_cache, epi); |
@@ -1319,9 +1364,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1319 | pt._key = event->events; | 1364 | pt._key = event->events; |
1320 | epi->event.data = event->data; /* protected by mtx */ | 1365 | epi->event.data = event->data; /* protected by mtx */ |
1321 | if (epi->event.events & EPOLLWAKEUP) { | 1366 | if (epi->event.events & EPOLLWAKEUP) { |
1322 | if (!epi->ws) | 1367 | if (!ep_has_wakeup_source(epi)) |
1323 | ep_create_wakeup_source(epi); | 1368 | ep_create_wakeup_source(epi); |
1324 | } else if (epi->ws) { | 1369 | } else if (ep_has_wakeup_source(epi)) { |
1325 | ep_destroy_wakeup_source(epi); | 1370 | ep_destroy_wakeup_source(epi); |
1326 | } | 1371 | } |
1327 | 1372 | ||
@@ -1359,7 +1404,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1359 | spin_lock_irq(&ep->lock); | 1404 | spin_lock_irq(&ep->lock); |
1360 | if (!ep_is_linked(&epi->rdllink)) { | 1405 | if (!ep_is_linked(&epi->rdllink)) { |
1361 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1406 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1362 | __pm_stay_awake(epi->ws); | 1407 | ep_pm_stay_awake(epi); |
1363 | 1408 | ||
1364 | /* Notify waiting tasks that events are available */ | 1409 | /* Notify waiting tasks that events are available */ |
1365 | if (waitqueue_active(&ep->wq)) | 1410 | if (waitqueue_active(&ep->wq)) |
@@ -1385,6 +1430,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1385 | unsigned int revents; | 1430 | unsigned int revents; |
1386 | struct epitem *epi; | 1431 | struct epitem *epi; |
1387 | struct epoll_event __user *uevent; | 1432 | struct epoll_event __user *uevent; |
1433 | struct wakeup_source *ws; | ||
1388 | poll_table pt; | 1434 | poll_table pt; |
1389 | 1435 | ||
1390 | init_poll_funcptr(&pt, NULL); | 1436 | init_poll_funcptr(&pt, NULL); |
@@ -1407,9 +1453,13 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1407 | * instead, but then epi->ws would temporarily be out of sync | 1453 | * instead, but then epi->ws would temporarily be out of sync |
1408 | * with ep_is_linked(). | 1454 | * with ep_is_linked(). |
1409 | */ | 1455 | */ |
1410 | if (epi->ws && epi->ws->active) | 1456 | ws = ep_wakeup_source(epi); |
1411 | __pm_stay_awake(ep->ws); | 1457 | if (ws) { |
1412 | __pm_relax(epi->ws); | 1458 | if (ws->active) |
1459 | __pm_stay_awake(ep->ws); | ||
1460 | __pm_relax(ws); | ||
1461 | } | ||
1462 | |||
1413 | list_del_init(&epi->rdllink); | 1463 | list_del_init(&epi->rdllink); |
1414 | 1464 | ||
1415 | pt._key = epi->event.events; | 1465 | pt._key = epi->event.events; |
@@ -1426,7 +1476,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1426 | if (__put_user(revents, &uevent->events) || | 1476 | if (__put_user(revents, &uevent->events) || |
1427 | __put_user(epi->event.data, &uevent->data)) { | 1477 | __put_user(epi->event.data, &uevent->data)) { |
1428 | list_add(&epi->rdllink, head); | 1478 | list_add(&epi->rdllink, head); |
1429 | __pm_stay_awake(epi->ws); | 1479 | ep_pm_stay_awake(epi); |
1430 | return eventcnt ? eventcnt : -EFAULT; | 1480 | return eventcnt ? eventcnt : -EFAULT; |
1431 | } | 1481 | } |
1432 | eventcnt++; | 1482 | eventcnt++; |
@@ -1446,7 +1496,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1446 | * poll callback will queue them in ep->ovflist. | 1496 | * poll callback will queue them in ep->ovflist. |
1447 | */ | 1497 | */ |
1448 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1498 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1449 | __pm_stay_awake(epi->ws); | 1499 | ep_pm_stay_awake(epi); |
1450 | } | 1500 | } |
1451 | } | 1501 | } |
1452 | } | 1502 | } |