aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c111
1 files changed, 108 insertions, 3 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cc8a9b7d6064..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1114,6 +1124,17 @@ static int ep_send_events(struct eventpoll *ep,
1114 return ep_scan_ready_list(ep, ep_send_events_proc, &esed); 1124 return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
1115} 1125}
1116 1126
1127static inline struct timespec ep_set_mstimeout(long ms)
1128{
1129 struct timespec now, ts = {
1130 .tv_sec = ms / MSEC_PER_SEC,
1131 .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
1132 };
1133
1134 ktime_get_ts(&now);
1135 return timespec_add_safe(now, ts);
1136}
1137
1117static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1138static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1118 int maxevents, long timeout) 1139 int maxevents, long timeout)
1119{ 1140{
@@ -1121,12 +1142,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1121 unsigned long flags; 1142 unsigned long flags;
1122 long slack; 1143 long slack;
1123 wait_queue_t wait; 1144 wait_queue_t wait;
1124 struct timespec end_time;
1125 ktime_t expires, *to = NULL; 1145 ktime_t expires, *to = NULL;
1126 1146
1127 if (timeout > 0) { 1147 if (timeout > 0) {
1128 ktime_get_ts(&end_time); 1148 struct timespec end_time = ep_set_mstimeout(timeout);
1129 timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); 1149
1130 slack = select_estimate_accuracy(&end_time); 1150 slack = select_estimate_accuracy(&end_time);
1131 to = &expires; 1151 to = &expires;
1132 *to = timespec_to_ktime(end_time); 1152 *to = timespec_to_ktime(end_time);
@@ -1188,6 +1208,62 @@ retry:
1188 return res; 1208 return res;
1189} 1209}
1190 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1191/* 1267/*
1192 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1193 */ 1269 */
@@ -1236,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1236 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1237{ 1313{
1238 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1239 struct file *file, *tfile; 1316 struct file *file, *tfile;
1240 struct eventpoll *ep; 1317 struct eventpoll *ep;
1241 struct epitem *epi; 1318 struct epitem *epi;
@@ -1277,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1277 */ 1354 */
1278 ep = file->private_data; 1355 ep = file->private_data;
1279 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1280 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1281 1377
1282 /* 1378 /*
@@ -1312,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1312 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1313 1409
1314error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1315 fput(tfile); 1414 fput(tfile);
1316error_fput: 1415error_fput:
1317 fput(file); 1416 fput(file);
@@ -1431,6 +1530,12 @@ static int __init eventpoll_init(void)
1431 EP_ITEM_COST; 1530 EP_ITEM_COST;
1432 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1433 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1434 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1435 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1436 1541