diff options
author | Arve Hjønnevåg <arve@android.com> | 2012-05-01 15:33:34 -0400 |
---|---|---|
committer | Rafael J. Wysocki <rjw@sisk.pl> | 2012-05-05 15:50:41 -0400 |
commit | 4d7e30d98939a0340022ccd49325a3d70f7e0238 (patch) | |
tree | 45307bd3a0b7fafc71aea41fc518f42e2e810e76 /fs/eventpoll.c | |
parent | b86ff9820fd5df69295273b9aa68e58786ffc23f (diff) |
epoll: Add a flag, EPOLLWAKEUP, to prevent suspend while epoll events are ready
When an epoll_event, that has the EPOLLWAKEUP flag set, is ready, a
wakeup_source will be active to prevent suspend. This can be used to
handle wakeup events from a driver that support poll, e.g. input, if
that driver wakes up the waitqueue passed to epoll before allowing
suspend.
Signed-off-by: Arve Hjønnevåg <arve@android.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 90 |
1 files changed, 87 insertions, 3 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c0b3c70ee87a..2cf0f2153be5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/bitops.h> | 33 | #include <linux/bitops.h> |
34 | #include <linux/mutex.h> | 34 | #include <linux/mutex.h> |
35 | #include <linux/anon_inodes.h> | 35 | #include <linux/anon_inodes.h> |
36 | #include <linux/device.h> | ||
36 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
37 | #include <asm/io.h> | 38 | #include <asm/io.h> |
38 | #include <asm/mman.h> | 39 | #include <asm/mman.h> |
@@ -87,7 +88,7 @@ | |||
87 | */ | 88 | */ |
88 | 89 | ||
89 | /* Epoll private bits inside the event mask */ | 90 | /* Epoll private bits inside the event mask */ |
90 | #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) | 91 | #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) |
91 | 92 | ||
92 | /* Maximum number of nesting allowed inside epoll sets */ | 93 | /* Maximum number of nesting allowed inside epoll sets */ |
93 | #define EP_MAX_NESTS 4 | 94 | #define EP_MAX_NESTS 4 |
@@ -154,6 +155,9 @@ struct epitem { | |||
154 | /* List header used to link this item to the "struct file" items list */ | 155 | /* List header used to link this item to the "struct file" items list */ |
155 | struct list_head fllink; | 156 | struct list_head fllink; |
156 | 157 | ||
158 | /* wakeup_source used when EPOLLWAKEUP is set */ | ||
159 | struct wakeup_source *ws; | ||
160 | |||
157 | /* The structure that describe the interested events and the source fd */ | 161 | /* The structure that describe the interested events and the source fd */ |
158 | struct epoll_event event; | 162 | struct epoll_event event; |
159 | }; | 163 | }; |
@@ -194,6 +198,9 @@ struct eventpoll { | |||
194 | */ | 198 | */ |
195 | struct epitem *ovflist; | 199 | struct epitem *ovflist; |
196 | 200 | ||
201 | /* wakeup_source used when ep_scan_ready_list is running */ | ||
202 | struct wakeup_source *ws; | ||
203 | |||
197 | /* The user that created the eventpoll descriptor */ | 204 | /* The user that created the eventpoll descriptor */ |
198 | struct user_struct *user; | 205 | struct user_struct *user; |
199 | 206 | ||
@@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
588 | * queued into ->ovflist but the "txlist" might already | 595 | * queued into ->ovflist but the "txlist" might already |
589 | * contain them, and the list_splice() below takes care of them. | 596 | * contain them, and the list_splice() below takes care of them. |
590 | */ | 597 | */ |
591 | if (!ep_is_linked(&epi->rdllink)) | 598 | if (!ep_is_linked(&epi->rdllink)) { |
592 | list_add_tail(&epi->rdllink, &ep->rdllist); | 599 | list_add_tail(&epi->rdllink, &ep->rdllist); |
600 | __pm_stay_awake(epi->ws); | ||
601 | } | ||
593 | } | 602 | } |
594 | /* | 603 | /* |
595 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after | 604 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after |
@@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
602 | * Quickly re-inject items left on "txlist". | 611 | * Quickly re-inject items left on "txlist". |
603 | */ | 612 | */ |
604 | list_splice(&txlist, &ep->rdllist); | 613 | list_splice(&txlist, &ep->rdllist); |
614 | __pm_relax(ep->ws); | ||
605 | 615 | ||
606 | if (!list_empty(&ep->rdllist)) { | 616 | if (!list_empty(&ep->rdllist)) { |
607 | /* | 617 | /* |
@@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) | |||
656 | list_del_init(&epi->rdllink); | 666 | list_del_init(&epi->rdllink); |
657 | spin_unlock_irqrestore(&ep->lock, flags); | 667 | spin_unlock_irqrestore(&ep->lock, flags); |
658 | 668 | ||
669 | wakeup_source_unregister(epi->ws); | ||
670 | |||
659 | /* At this point it is safe to free the eventpoll item */ | 671 | /* At this point it is safe to free the eventpoll item */ |
660 | kmem_cache_free(epi_cache, epi); | 672 | kmem_cache_free(epi_cache, epi); |
661 | 673 | ||
@@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep) | |||
706 | mutex_unlock(&epmutex); | 718 | mutex_unlock(&epmutex); |
707 | mutex_destroy(&ep->mtx); | 719 | mutex_destroy(&ep->mtx); |
708 | free_uid(ep->user); | 720 | free_uid(ep->user); |
721 | wakeup_source_unregister(ep->ws); | ||
709 | kfree(ep); | 722 | kfree(ep); |
710 | } | 723 | } |
711 | 724 | ||
@@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
737 | * callback, but it's not actually ready, as far as | 750 | * callback, but it's not actually ready, as far as |
738 | * caller requested events goes. We can remove it here. | 751 | * caller requested events goes. We can remove it here. |
739 | */ | 752 | */ |
753 | __pm_relax(epi->ws); | ||
740 | list_del_init(&epi->rdllink); | 754 | list_del_init(&epi->rdllink); |
741 | } | 755 | } |
742 | } | 756 | } |
@@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
927 | if (epi->next == EP_UNACTIVE_PTR) { | 941 | if (epi->next == EP_UNACTIVE_PTR) { |
928 | epi->next = ep->ovflist; | 942 | epi->next = ep->ovflist; |
929 | ep->ovflist = epi; | 943 | ep->ovflist = epi; |
944 | if (epi->ws) { | ||
945 | /* | ||
946 | * Activate ep->ws since epi->ws may get | ||
947 | * deactivated at any time. | ||
948 | */ | ||
949 | __pm_stay_awake(ep->ws); | ||
950 | } | ||
951 | |||
930 | } | 952 | } |
931 | goto out_unlock; | 953 | goto out_unlock; |
932 | } | 954 | } |
933 | 955 | ||
934 | /* If this file is already in the ready list we exit soon */ | 956 | /* If this file is already in the ready list we exit soon */ |
935 | if (!ep_is_linked(&epi->rdllink)) | 957 | if (!ep_is_linked(&epi->rdllink)) { |
936 | list_add_tail(&epi->rdllink, &ep->rdllist); | 958 | list_add_tail(&epi->rdllink, &ep->rdllist); |
959 | __pm_stay_awake(epi->ws); | ||
960 | } | ||
937 | 961 | ||
938 | /* | 962 | /* |
939 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() | 963 | * Wake up ( if active ) both the eventpoll wait list and the ->poll() |
@@ -1091,6 +1115,30 @@ static int reverse_path_check(void) | |||
1091 | return error; | 1115 | return error; |
1092 | } | 1116 | } |
1093 | 1117 | ||
1118 | static int ep_create_wakeup_source(struct epitem *epi) | ||
1119 | { | ||
1120 | const char *name; | ||
1121 | |||
1122 | if (!epi->ep->ws) { | ||
1123 | epi->ep->ws = wakeup_source_register("eventpoll"); | ||
1124 | if (!epi->ep->ws) | ||
1125 | return -ENOMEM; | ||
1126 | } | ||
1127 | |||
1128 | name = epi->ffd.file->f_path.dentry->d_name.name; | ||
1129 | epi->ws = wakeup_source_register(name); | ||
1130 | if (!epi->ws) | ||
1131 | return -ENOMEM; | ||
1132 | |||
1133 | return 0; | ||
1134 | } | ||
1135 | |||
1136 | static void ep_destroy_wakeup_source(struct epitem *epi) | ||
1137 | { | ||
1138 | wakeup_source_unregister(epi->ws); | ||
1139 | epi->ws = NULL; | ||
1140 | } | ||
1141 | |||
1094 | /* | 1142 | /* |
1095 | * Must be called with "mtx" held. | 1143 | * Must be called with "mtx" held. |
1096 | */ | 1144 | */ |
@@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1118 | epi->event = *event; | 1166 | epi->event = *event; |
1119 | epi->nwait = 0; | 1167 | epi->nwait = 0; |
1120 | epi->next = EP_UNACTIVE_PTR; | 1168 | epi->next = EP_UNACTIVE_PTR; |
1169 | if (epi->event.events & EPOLLWAKEUP) { | ||
1170 | error = ep_create_wakeup_source(epi); | ||
1171 | if (error) | ||
1172 | goto error_create_wakeup_source; | ||
1173 | } else { | ||
1174 | epi->ws = NULL; | ||
1175 | } | ||
1121 | 1176 | ||
1122 | /* Initialize the poll table using the queue callback */ | 1177 | /* Initialize the poll table using the queue callback */ |
1123 | epq.epi = epi; | 1178 | epq.epi = epi; |
@@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1164 | /* If the file is already "ready" we drop it inside the ready list */ | 1219 | /* If the file is already "ready" we drop it inside the ready list */ |
1165 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { | 1220 | if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { |
1166 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1221 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1222 | __pm_stay_awake(epi->ws); | ||
1167 | 1223 | ||
1168 | /* Notify waiting tasks that events are available */ | 1224 | /* Notify waiting tasks that events are available */ |
1169 | if (waitqueue_active(&ep->wq)) | 1225 | if (waitqueue_active(&ep->wq)) |
@@ -1204,6 +1260,9 @@ error_unregister: | |||
1204 | list_del_init(&epi->rdllink); | 1260 | list_del_init(&epi->rdllink); |
1205 | spin_unlock_irqrestore(&ep->lock, flags); | 1261 | spin_unlock_irqrestore(&ep->lock, flags); |
1206 | 1262 | ||
1263 | wakeup_source_unregister(epi->ws); | ||
1264 | |||
1265 | error_create_wakeup_source: | ||
1207 | kmem_cache_free(epi_cache, epi); | 1266 | kmem_cache_free(epi_cache, epi); |
1208 | 1267 | ||
1209 | return error; | 1268 | return error; |
@@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1229 | epi->event.events = event->events; | 1288 | epi->event.events = event->events; |
1230 | pt._key = event->events; | 1289 | pt._key = event->events; |
1231 | epi->event.data = event->data; /* protected by mtx */ | 1290 | epi->event.data = event->data; /* protected by mtx */ |
1291 | if (epi->event.events & EPOLLWAKEUP) { | ||
1292 | if (!epi->ws) | ||
1293 | ep_create_wakeup_source(epi); | ||
1294 | } else if (epi->ws) { | ||
1295 | ep_destroy_wakeup_source(epi); | ||
1296 | } | ||
1232 | 1297 | ||
1233 | /* | 1298 | /* |
1234 | * Get current event bits. We can safely use the file* here because | 1299 | * Get current event bits. We can safely use the file* here because |
@@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
1244 | spin_lock_irq(&ep->lock); | 1309 | spin_lock_irq(&ep->lock); |
1245 | if (!ep_is_linked(&epi->rdllink)) { | 1310 | if (!ep_is_linked(&epi->rdllink)) { |
1246 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1311 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1312 | __pm_stay_awake(epi->ws); | ||
1247 | 1313 | ||
1248 | /* Notify waiting tasks that events are available */ | 1314 | /* Notify waiting tasks that events are available */ |
1249 | if (waitqueue_active(&ep->wq)) | 1315 | if (waitqueue_active(&ep->wq)) |
@@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1282 | !list_empty(head) && eventcnt < esed->maxevents;) { | 1348 | !list_empty(head) && eventcnt < esed->maxevents;) { |
1283 | epi = list_first_entry(head, struct epitem, rdllink); | 1349 | epi = list_first_entry(head, struct epitem, rdllink); |
1284 | 1350 | ||
1351 | /* | ||
1352 | * Activate ep->ws before deactivating epi->ws to prevent | ||
1353 | * triggering auto-suspend here (in case we reactive epi->ws | ||
1354 | * below). | ||
1355 | * | ||
1356 | * This could be rearranged to delay the deactivation of epi->ws | ||
1357 | * instead, but then epi->ws would temporarily be out of sync | ||
1358 | * with ep_is_linked(). | ||
1359 | */ | ||
1360 | if (epi->ws && epi->ws->active) | ||
1361 | __pm_stay_awake(ep->ws); | ||
1362 | __pm_relax(epi->ws); | ||
1285 | list_del_init(&epi->rdllink); | 1363 | list_del_init(&epi->rdllink); |
1286 | 1364 | ||
1287 | pt._key = epi->event.events; | 1365 | pt._key = epi->event.events; |
@@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1298 | if (__put_user(revents, &uevent->events) || | 1376 | if (__put_user(revents, &uevent->events) || |
1299 | __put_user(epi->event.data, &uevent->data)) { | 1377 | __put_user(epi->event.data, &uevent->data)) { |
1300 | list_add(&epi->rdllink, head); | 1378 | list_add(&epi->rdllink, head); |
1379 | __pm_stay_awake(epi->ws); | ||
1301 | return eventcnt ? eventcnt : -EFAULT; | 1380 | return eventcnt ? eventcnt : -EFAULT; |
1302 | } | 1381 | } |
1303 | eventcnt++; | 1382 | eventcnt++; |
@@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, | |||
1317 | * poll callback will queue them in ep->ovflist. | 1396 | * poll callback will queue them in ep->ovflist. |
1318 | */ | 1397 | */ |
1319 | list_add_tail(&epi->rdllink, &ep->rdllist); | 1398 | list_add_tail(&epi->rdllink, &ep->rdllist); |
1399 | __pm_stay_awake(epi->ws); | ||
1320 | } | 1400 | } |
1321 | } | 1401 | } |
1322 | } | 1402 | } |
@@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1629 | if (!tfile->f_op || !tfile->f_op->poll) | 1709 | if (!tfile->f_op || !tfile->f_op->poll) |
1630 | goto error_tgt_fput; | 1710 | goto error_tgt_fput; |
1631 | 1711 | ||
1712 | /* Check if EPOLLWAKEUP is allowed */ | ||
1713 | if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) | ||
1714 | goto error_tgt_fput; | ||
1715 | |||
1632 | /* | 1716 | /* |
1633 | * We have to check that the file structure underneath the file descriptor | 1717 | * We have to check that the file structure underneath the file descriptor |
1634 | * the user passed to us _is_ an eventpoll file. And also we do not permit | 1718 | * the user passed to us _is_ an eventpoll file. And also we do not permit |