aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2013-04-30 18:27:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 20:04:04 -0400
commiteea1d585917c538d90bc26fda5d8e53796feada2 (patch)
tree2a0d05e84a0b036b6174b27d6359f11e89f56b46 /fs/eventpoll.c
parent39732ca5af4b09f4db561149041ddad7211019a5 (diff)
epoll: use RCU to protect wakeup_source in epitem
This prevents wakeup_source destruction when a user hits the item with EPOLL_CTL_MOD while ep_poll_callback is running. Tested with CONFIG_SPARSE_RCU_POINTER=y and "make fs/eventpoll.o C=2" Signed-off-by: Eric Wong <normalperson@yhbt.net> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Arve Hjønnevåg <arve@android.com> Cc: Davide Libenzi <davidel@xmailserver.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: NeilBrown <neilb@suse.de> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c92
1 files changed, 71 insertions, 21 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0e5eda068520..a3acf936c72a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -160,7 +160,7 @@ struct epitem {
160 struct list_head fllink; 160 struct list_head fllink;
161 161
162 /* wakeup_source used when EPOLLWAKEUP is set */ 162 /* wakeup_source used when EPOLLWAKEUP is set */
163 struct wakeup_source *ws; 163 struct wakeup_source __rcu *ws;
164 164
165 /* The structure that describe the interested events and the source fd */ 165 /* The structure that describe the interested events and the source fd */
166 struct epoll_event event; 166 struct epoll_event event;
@@ -538,6 +538,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
538 } 538 }
539} 539}
540 540
541/* call only when ep->mtx is held */
542static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi)
543{
544 return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx));
545}
546
547/* call only when ep->mtx is held */
548static inline void ep_pm_stay_awake(struct epitem *epi)
549{
550 struct wakeup_source *ws = ep_wakeup_source(epi);
551
552 if (ws)
553 __pm_stay_awake(ws);
554}
555
556static inline bool ep_has_wakeup_source(struct epitem *epi)
557{
558 return rcu_access_pointer(epi->ws) ? true : false;
559}
560
561/* call when ep->mtx cannot be held (ep_poll_callback) */
562static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
563{
564 struct wakeup_source *ws;
565
566 rcu_read_lock();
567 ws = rcu_dereference(epi->ws);
568 if (ws)
569 __pm_stay_awake(ws);
570 rcu_read_unlock();
571}
572
541/** 573/**
542 * ep_scan_ready_list - Scans the ready list in a way that makes possible for 574 * ep_scan_ready_list - Scans the ready list in a way that makes possible for
543 * the scan code, to call f_op->poll(). Also allows for 575 * the scan code, to call f_op->poll(). Also allows for
@@ -601,7 +633,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
601 */ 633 */
602 if (!ep_is_linked(&epi->rdllink)) { 634 if (!ep_is_linked(&epi->rdllink)) {
603 list_add_tail(&epi->rdllink, &ep->rdllist); 635 list_add_tail(&epi->rdllink, &ep->rdllist);
604 __pm_stay_awake(epi->ws); 636 ep_pm_stay_awake(epi);
605 } 637 }
606 } 638 }
607 /* 639 /*
@@ -670,7 +702,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
670 list_del_init(&epi->rdllink); 702 list_del_init(&epi->rdllink);
671 spin_unlock_irqrestore(&ep->lock, flags); 703 spin_unlock_irqrestore(&ep->lock, flags);
672 704
673 wakeup_source_unregister(epi->ws); 705 wakeup_source_unregister(ep_wakeup_source(epi));
674 706
675 /* At this point it is safe to free the eventpoll item */ 707 /* At this point it is safe to free the eventpoll item */
676 kmem_cache_free(epi_cache, epi); 708 kmem_cache_free(epi_cache, epi);
@@ -754,7 +786,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
754 * callback, but it's not actually ready, as far as 786 * callback, but it's not actually ready, as far as
755 * caller requested events goes. We can remove it here. 787 * caller requested events goes. We can remove it here.
756 */ 788 */
757 __pm_relax(epi->ws); 789 __pm_relax(ep_wakeup_source(epi));
758 list_del_init(&epi->rdllink); 790 list_del_init(&epi->rdllink);
759 } 791 }
760 } 792 }
@@ -986,7 +1018,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
986 /* If this file is already in the ready list we exit soon */ 1018 /* If this file is already in the ready list we exit soon */
987 if (!ep_is_linked(&epi->rdllink)) { 1019 if (!ep_is_linked(&epi->rdllink)) {
988 list_add_tail(&epi->rdllink, &ep->rdllist); 1020 list_add_tail(&epi->rdllink, &ep->rdllist);
989 __pm_stay_awake(epi->ws); 1021 ep_pm_stay_awake_rcu(epi);
990 } 1022 }
991 1023
992 /* 1024 /*
@@ -1148,6 +1180,7 @@ static int reverse_path_check(void)
1148static int ep_create_wakeup_source(struct epitem *epi) 1180static int ep_create_wakeup_source(struct epitem *epi)
1149{ 1181{
1150 const char *name; 1182 const char *name;
1183 struct wakeup_source *ws;
1151 1184
1152 if (!epi->ep->ws) { 1185 if (!epi->ep->ws) {
1153 epi->ep->ws = wakeup_source_register("eventpoll"); 1186 epi->ep->ws = wakeup_source_register("eventpoll");
@@ -1156,17 +1189,29 @@ static int ep_create_wakeup_source(struct epitem *epi)
1156 } 1189 }
1157 1190
1158 name = epi->ffd.file->f_path.dentry->d_name.name; 1191 name = epi->ffd.file->f_path.dentry->d_name.name;
1159 epi->ws = wakeup_source_register(name); 1192 ws = wakeup_source_register(name);
1160 if (!epi->ws) 1193
1194 if (!ws)
1161 return -ENOMEM; 1195 return -ENOMEM;
1196 rcu_assign_pointer(epi->ws, ws);
1162 1197
1163 return 0; 1198 return 0;
1164} 1199}
1165 1200
1166static void ep_destroy_wakeup_source(struct epitem *epi) 1201/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
1202static noinline void ep_destroy_wakeup_source(struct epitem *epi)
1167{ 1203{
1168 wakeup_source_unregister(epi->ws); 1204 struct wakeup_source *ws = ep_wakeup_source(epi);
1169 epi->ws = NULL; 1205
1206 rcu_assign_pointer(epi->ws, NULL);
1207
1208 /*
1209 * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is
1210 * used internally by wakeup_source_remove, too (called by
1211 * wakeup_source_unregister), so we cannot use call_rcu
1212 */
1213 synchronize_rcu();
1214 wakeup_source_unregister(ws);
1170} 1215}
1171 1216
1172/* 1217/*
@@ -1201,7 +1246,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1201 if (error) 1246 if (error)
1202 goto error_create_wakeup_source; 1247 goto error_create_wakeup_source;
1203 } else { 1248 } else {
1204 epi->ws = NULL; 1249 RCU_INIT_POINTER(epi->ws, NULL);
1205 } 1250 }
1206 1251
1207 /* Initialize the poll table using the queue callback */ 1252 /* Initialize the poll table using the queue callback */
@@ -1249,7 +1294,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1249 /* If the file is already "ready" we drop it inside the ready list */ 1294 /* If the file is already "ready" we drop it inside the ready list */
1250 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { 1295 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
1251 list_add_tail(&epi->rdllink, &ep->rdllist); 1296 list_add_tail(&epi->rdllink, &ep->rdllist);
1252 __pm_stay_awake(epi->ws); 1297 ep_pm_stay_awake(epi);
1253 1298
1254 /* Notify waiting tasks that events are available */ 1299 /* Notify waiting tasks that events are available */
1255 if (waitqueue_active(&ep->wq)) 1300 if (waitqueue_active(&ep->wq))
@@ -1290,7 +1335,7 @@ error_unregister:
1290 list_del_init(&epi->rdllink); 1335 list_del_init(&epi->rdllink);
1291 spin_unlock_irqrestore(&ep->lock, flags); 1336 spin_unlock_irqrestore(&ep->lock, flags);
1292 1337
1293 wakeup_source_unregister(epi->ws); 1338 wakeup_source_unregister(ep_wakeup_source(epi));
1294 1339
1295error_create_wakeup_source: 1340error_create_wakeup_source:
1296 kmem_cache_free(epi_cache, epi); 1341 kmem_cache_free(epi_cache, epi);
@@ -1319,9 +1364,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1319 pt._key = event->events; 1364 pt._key = event->events;
1320 epi->event.data = event->data; /* protected by mtx */ 1365 epi->event.data = event->data; /* protected by mtx */
1321 if (epi->event.events & EPOLLWAKEUP) { 1366 if (epi->event.events & EPOLLWAKEUP) {
1322 if (!epi->ws) 1367 if (!ep_has_wakeup_source(epi))
1323 ep_create_wakeup_source(epi); 1368 ep_create_wakeup_source(epi);
1324 } else if (epi->ws) { 1369 } else if (ep_has_wakeup_source(epi)) {
1325 ep_destroy_wakeup_source(epi); 1370 ep_destroy_wakeup_source(epi);
1326 } 1371 }
1327 1372
@@ -1359,7 +1404,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1359 spin_lock_irq(&ep->lock); 1404 spin_lock_irq(&ep->lock);
1360 if (!ep_is_linked(&epi->rdllink)) { 1405 if (!ep_is_linked(&epi->rdllink)) {
1361 list_add_tail(&epi->rdllink, &ep->rdllist); 1406 list_add_tail(&epi->rdllink, &ep->rdllist);
1362 __pm_stay_awake(epi->ws); 1407 ep_pm_stay_awake(epi);
1363 1408
1364 /* Notify waiting tasks that events are available */ 1409 /* Notify waiting tasks that events are available */
1365 if (waitqueue_active(&ep->wq)) 1410 if (waitqueue_active(&ep->wq))
@@ -1385,6 +1430,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1385 unsigned int revents; 1430 unsigned int revents;
1386 struct epitem *epi; 1431 struct epitem *epi;
1387 struct epoll_event __user *uevent; 1432 struct epoll_event __user *uevent;
1433 struct wakeup_source *ws;
1388 poll_table pt; 1434 poll_table pt;
1389 1435
1390 init_poll_funcptr(&pt, NULL); 1436 init_poll_funcptr(&pt, NULL);
@@ -1407,9 +1453,13 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1407 * instead, but then epi->ws would temporarily be out of sync 1453 * instead, but then epi->ws would temporarily be out of sync
1408 * with ep_is_linked(). 1454 * with ep_is_linked().
1409 */ 1455 */
1410 if (epi->ws && epi->ws->active) 1456 ws = ep_wakeup_source(epi);
1411 __pm_stay_awake(ep->ws); 1457 if (ws) {
1412 __pm_relax(epi->ws); 1458 if (ws->active)
1459 __pm_stay_awake(ep->ws);
1460 __pm_relax(ws);
1461 }
1462
1413 list_del_init(&epi->rdllink); 1463 list_del_init(&epi->rdllink);
1414 1464
1415 pt._key = epi->event.events; 1465 pt._key = epi->event.events;
@@ -1426,7 +1476,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1426 if (__put_user(revents, &uevent->events) || 1476 if (__put_user(revents, &uevent->events) ||
1427 __put_user(epi->event.data, &uevent->data)) { 1477 __put_user(epi->event.data, &uevent->data)) {
1428 list_add(&epi->rdllink, head); 1478 list_add(&epi->rdllink, head);
1429 __pm_stay_awake(epi->ws); 1479 ep_pm_stay_awake(epi);
1430 return eventcnt ? eventcnt : -EFAULT; 1480 return eventcnt ? eventcnt : -EFAULT;
1431 } 1481 }
1432 eventcnt++; 1482 eventcnt++;
@@ -1446,7 +1496,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1446 * poll callback will queue them in ep->ovflist. 1496 * poll callback will queue them in ep->ovflist.
1447 */ 1497 */
1448 list_add_tail(&epi->rdllink, &ep->rdllist); 1498 list_add_tail(&epi->rdllink, &ep->rdllist);
1449 __pm_stay_awake(epi->ws); 1499 ep_pm_stay_awake(epi);
1450 } 1500 }
1451 } 1501 }
1452 } 1502 }