diff options
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 111 |
1 files changed, 108 insertions, 3 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cc8a9b7d6064..4a09af9e9a63 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -63,6 +63,13 @@ | |||
63 | * cleanup path and it is also acquired by eventpoll_release_file() | 63 | * cleanup path and it is also acquired by eventpoll_release_file() |
64 | * if a file has been pushed inside an epoll set and it is then | 64 | * if a file has been pushed inside an epoll set and it is then |
65 | * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). | 65 | * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). |
66 | * It is also acquired when inserting an epoll fd onto another epoll | ||
67 | * fd. We do this so that we walk the epoll tree and ensure that this | ||
68 | * insertion does not create a cycle of epoll file descriptors, which | ||
69 | * could lead to deadlock. We need a global mutex to prevent two | ||
70 | * simultaneous inserts (A into B and B into A) from racing and | ||
71 | * constructing a cycle without either insert observing that it is | ||
72 | * going to. | ||
66 | * It is possible to drop the "ep->mtx" and to use the global | 73 | * It is possible to drop the "ep->mtx" and to use the global |
67 | * mutex "epmutex" (together with "ep->lock") to have it working, | 74 | * mutex "epmutex" (together with "ep->lock") to have it working, |
68 | * but having "ep->mtx" will make the interface more scalable. | 75 | * but having "ep->mtx" will make the interface more scalable. |
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly; | |||
224 | */ | 231 | */ |
225 | static DEFINE_MUTEX(epmutex); | 232 | static DEFINE_MUTEX(epmutex); |
226 | 233 | ||
234 | /* Used to check for epoll file descriptor inclusion loops */ | ||
235 | static struct nested_calls poll_loop_ncalls; | ||
236 | |||
227 | /* Used for safe wake up implementation */ | 237 | /* Used for safe wake up implementation */ |
228 | static struct nested_calls poll_safewake_ncalls; | 238 | static struct nested_calls poll_safewake_ncalls; |
229 | 239 | ||
@@ -1114,6 +1124,17 @@ static int ep_send_events(struct eventpoll *ep, | |||
1114 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed); | 1124 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed); |
1115 | } | 1125 | } |
1116 | 1126 | ||
1127 | static inline struct timespec ep_set_mstimeout(long ms) | ||
1128 | { | ||
1129 | struct timespec now, ts = { | ||
1130 | .tv_sec = ms / MSEC_PER_SEC, | ||
1131 | .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), | ||
1132 | }; | ||
1133 | |||
1134 | ktime_get_ts(&now); | ||
1135 | return timespec_add_safe(now, ts); | ||
1136 | } | ||
1137 | |||
1117 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | 1138 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, |
1118 | int maxevents, long timeout) | 1139 | int maxevents, long timeout) |
1119 | { | 1140 | { |
@@ -1121,12 +1142,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | |||
1121 | unsigned long flags; | 1142 | unsigned long flags; |
1122 | long slack; | 1143 | long slack; |
1123 | wait_queue_t wait; | 1144 | wait_queue_t wait; |
1124 | struct timespec end_time; | ||
1125 | ktime_t expires, *to = NULL; | 1145 | ktime_t expires, *to = NULL; |
1126 | 1146 | ||
1127 | if (timeout > 0) { | 1147 | if (timeout > 0) { |
1128 | ktime_get_ts(&end_time); | 1148 | struct timespec end_time = ep_set_mstimeout(timeout); |
1129 | timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); | 1149 | |
1130 | slack = select_estimate_accuracy(&end_time); | 1150 | slack = select_estimate_accuracy(&end_time); |
1131 | to = &expires; | 1151 | to = &expires; |
1132 | *to = timespec_to_ktime(end_time); | 1152 | *to = timespec_to_ktime(end_time); |
@@ -1188,6 +1208,62 @@ retry: | |||
1188 | return res; | 1208 | return res; |
1189 | } | 1209 | } |
1190 | 1210 | ||
1211 | /** | ||
1212 | * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested() | ||
1213 | * API, to verify that adding an epoll file inside another | ||
1214 | * epoll structure, does not violate the constraints, in | ||
1215 | * terms of closed loops, or too deep chains (which can | ||
1216 | * result in excessive stack usage). | ||
1217 | * | ||
1218 | * @priv: Pointer to the epoll file to be currently checked. | ||
1219 | * @cookie: Original cookie for this call. This is the top-of-the-chain epoll | ||
1220 | * data structure pointer. | ||
1221 | * @call_nests: Current dept of the @ep_call_nested() call stack. | ||
1222 | * | ||
1223 | * Returns: Returns zero if adding the epoll @file inside current epoll | ||
1224 | * structure @ep does not violate the constraints, or -1 otherwise. | ||
1225 | */ | ||
1226 | static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | ||
1227 | { | ||
1228 | int error = 0; | ||
1229 | struct file *file = priv; | ||
1230 | struct eventpoll *ep = file->private_data; | ||
1231 | struct rb_node *rbp; | ||
1232 | struct epitem *epi; | ||
1233 | |||
1234 | mutex_lock(&ep->mtx); | ||
1235 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { | ||
1236 | epi = rb_entry(rbp, struct epitem, rbn); | ||
1237 | if (unlikely(is_file_epoll(epi->ffd.file))) { | ||
1238 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1239 | ep_loop_check_proc, epi->ffd.file, | ||
1240 | epi->ffd.file->private_data, current); | ||
1241 | if (error != 0) | ||
1242 | break; | ||
1243 | } | ||
1244 | } | ||
1245 | mutex_unlock(&ep->mtx); | ||
1246 | |||
1247 | return error; | ||
1248 | } | ||
1249 | |||
1250 | /** | ||
1251 | * ep_loop_check - Performs a check to verify that adding an epoll file (@file) | ||
1252 | * another epoll file (represented by @ep) does not create | ||
1253 | * closed loops or too deep chains. | ||
1254 | * | ||
1255 | * @ep: Pointer to the epoll private data structure. | ||
1256 | * @file: Pointer to the epoll file to be checked. | ||
1257 | * | ||
1258 | * Returns: Returns zero if adding the epoll @file inside current epoll | ||
1259 | * structure @ep does not violate the constraints, or -1 otherwise. | ||
1260 | */ | ||
1261 | static int ep_loop_check(struct eventpoll *ep, struct file *file) | ||
1262 | { | ||
1263 | return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1264 | ep_loop_check_proc, file, ep, current); | ||
1265 | } | ||
1266 | |||
1191 | /* | 1267 | /* |
1192 | * Open an eventpoll file descriptor. | 1268 | * Open an eventpoll file descriptor. |
1193 | */ | 1269 | */ |
@@ -1236,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1236 | struct epoll_event __user *, event) | 1312 | struct epoll_event __user *, event) |
1237 | { | 1313 | { |
1238 | int error; | 1314 | int error; |
1315 | int did_lock_epmutex = 0; | ||
1239 | struct file *file, *tfile; | 1316 | struct file *file, *tfile; |
1240 | struct eventpoll *ep; | 1317 | struct eventpoll *ep; |
1241 | struct epitem *epi; | 1318 | struct epitem *epi; |
@@ -1277,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1277 | */ | 1354 | */ |
1278 | ep = file->private_data; | 1355 | ep = file->private_data; |
1279 | 1356 | ||
1357 | /* | ||
1358 | * When we insert an epoll file descriptor, inside another epoll file | ||
1359 | * descriptor, there is the change of creating closed loops, which are | ||
1360 | * better be handled here, than in more critical paths. | ||
1361 | * | ||
1362 | * We hold epmutex across the loop check and the insert in this case, in | ||
1363 | * order to prevent two separate inserts from racing and each doing the | ||
1364 | * insert "at the same time" such that ep_loop_check passes on both | ||
1365 | * before either one does the insert, thereby creating a cycle. | ||
1366 | */ | ||
1367 | if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { | ||
1368 | mutex_lock(&epmutex); | ||
1369 | did_lock_epmutex = 1; | ||
1370 | error = -ELOOP; | ||
1371 | if (ep_loop_check(ep, tfile) != 0) | ||
1372 | goto error_tgt_fput; | ||
1373 | } | ||
1374 | |||
1375 | |||
1280 | mutex_lock(&ep->mtx); | 1376 | mutex_lock(&ep->mtx); |
1281 | 1377 | ||
1282 | /* | 1378 | /* |
@@ -1312,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1312 | mutex_unlock(&ep->mtx); | 1408 | mutex_unlock(&ep->mtx); |
1313 | 1409 | ||
1314 | error_tgt_fput: | 1410 | error_tgt_fput: |
1411 | if (unlikely(did_lock_epmutex)) | ||
1412 | mutex_unlock(&epmutex); | ||
1413 | |||
1315 | fput(tfile); | 1414 | fput(tfile); |
1316 | error_fput: | 1415 | error_fput: |
1317 | fput(file); | 1416 | fput(file); |
@@ -1431,6 +1530,12 @@ static int __init eventpoll_init(void) | |||
1431 | EP_ITEM_COST; | 1530 | EP_ITEM_COST; |
1432 | BUG_ON(max_user_watches < 0); | 1531 | BUG_ON(max_user_watches < 0); |
1433 | 1532 | ||
1533 | /* | ||
1534 | * Initialize the structure used to perform epoll file descriptor | ||
1535 | * inclusion loops checks. | ||
1536 | */ | ||
1537 | ep_nested_calls_init(&poll_loop_ncalls); | ||
1538 | |||
1434 | /* Initialize the structure used to perform safe poll wait head wake ups */ | 1539 | /* Initialize the structure used to perform safe poll wait head wake ups */ |
1435 | ep_nested_calls_init(&poll_safewake_ncalls); | 1540 | ep_nested_calls_init(&poll_safewake_ncalls); |
1436 | 1541 | ||