aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/eventpoll.c234
-rw-r--r--include/linux/eventpoll.h1
-rw-r--r--include/linux/fs.h1
3 files changed, 211 insertions, 25 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a39cf4cfb9d..6879d0c5cb5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -197,6 +197,12 @@ struct eventpoll {
197 197
198 /* The user that created the eventpoll descriptor */ 198 /* The user that created the eventpoll descriptor */
199 struct user_struct *user; 199 struct user_struct *user;
200
201 struct file *file;
202
203 /* used to optimize loop detection check */
204 int visited;
205 struct list_head visited_list_link;
200}; 206};
201 207
202/* Wait structure used by the poll hooks */ 208/* Wait structure used by the poll hooks */
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
255/* Slab cache used to allocate "struct eppoll_entry" */ 261/* Slab cache used to allocate "struct eppoll_entry" */
256static struct kmem_cache *pwq_cache __read_mostly; 262static struct kmem_cache *pwq_cache __read_mostly;
257 263
264/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
265static LIST_HEAD(visited_list);
266
267/*
268 * List of files with newly added links, where we may need to limit the number
269 * of emanating paths. Protected by the epmutex.
270 */
271static LIST_HEAD(tfile_check_list);
272
258#ifdef CONFIG_SYSCTL 273#ifdef CONFIG_SYSCTL
259 274
260#include <linux/sysctl.h> 275#include <linux/sysctl.h>
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = {
276}; 291};
277#endif /* CONFIG_SYSCTL */ 292#endif /* CONFIG_SYSCTL */
278 293
294static const struct file_operations eventpoll_fops;
295
296static inline int is_file_epoll(struct file *f)
297{
298 return f->f_op == &eventpoll_fops;
299}
279 300
280/* Setup the structure that is used as key for the RB tree */ 301/* Setup the structure that is used as key for the RB tree */
281static inline void ep_set_ffd(struct epoll_filefd *ffd, 302static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -728,12 +749,6 @@ static const struct file_operations eventpoll_fops = {
728 .llseek = noop_llseek, 749 .llseek = noop_llseek,
729}; 750};
730 751
731/* Fast test to see if the file is an evenpoll file */
732static inline int is_file_epoll(struct file *f)
733{
734 return f->f_op == &eventpoll_fops;
735}
736
737/* 752/*
738 * This is called from eventpoll_release() to unlink files from the eventpoll 753 * This is called from eventpoll_release() to unlink files from the eventpoll
739 * interface. We need to have this facility to cleanup correctly files that are 754 * interface. We need to have this facility to cleanup correctly files that are
@@ -954,6 +969,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
954 rb_insert_color(&epi->rbn, &ep->rbr); 969 rb_insert_color(&epi->rbn, &ep->rbr);
955} 970}
956 971
972
973
974#define PATH_ARR_SIZE 5
975/*
976 * These are the number paths of length 1 to 5, that we are allowing to emanate
977 * from a single file of interest. For example, we allow 1000 paths of length
978 * 1, to emanate from each file of interest. This essentially represents the
979 * potential wakeup paths, which need to be limited in order to avoid massive
980 * uncontrolled wakeup storms. The common use case should be a single ep which
981 * is connected to n file sources. In this case each file source has 1 path
982 * of length 1. Thus, the numbers below should be more than sufficient. These
983 * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
984 * and delete can't add additional paths. Protected by the epmutex.
985 */
986static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
987static int path_count[PATH_ARR_SIZE];
988
989static int path_count_inc(int nests)
990{
991 if (++path_count[nests] > path_limits[nests])
992 return -1;
993 return 0;
994}
995
996static void path_count_init(void)
997{
998 int i;
999
1000 for (i = 0; i < PATH_ARR_SIZE; i++)
1001 path_count[i] = 0;
1002}
1003
1004static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
1005{
1006 int error = 0;
1007 struct file *file = priv;
1008 struct file *child_file;
1009 struct epitem *epi;
1010
1011 list_for_each_entry(epi, &file->f_ep_links, fllink) {
1012 child_file = epi->ep->file;
1013 if (is_file_epoll(child_file)) {
1014 if (list_empty(&child_file->f_ep_links)) {
1015 if (path_count_inc(call_nests)) {
1016 error = -1;
1017 break;
1018 }
1019 } else {
1020 error = ep_call_nested(&poll_loop_ncalls,
1021 EP_MAX_NESTS,
1022 reverse_path_check_proc,
1023 child_file, child_file,
1024 current);
1025 }
1026 if (error != 0)
1027 break;
1028 } else {
1029 printk(KERN_ERR "reverse_path_check_proc: "
1030 "file is not an ep!\n");
1031 }
1032 }
1033 return error;
1034}
1035
1036/**
1037 * reverse_path_check - The tfile_check_list is list of file *, which have
1038 * links that are proposed to be newly added. We need to
1039 * make sure that those added links don't add too many
1040 * paths such that we will spend all our time waking up
1041 * eventpoll objects.
1042 *
1043 * Returns: Returns zero if the proposed links don't create too many paths,
1044 * -1 otherwise.
1045 */
1046static int reverse_path_check(void)
1047{
1048 int length = 0;
1049 int error = 0;
1050 struct file *current_file;
1051
1052 /* let's call this for all tfiles */
1053 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
1054 length++;
1055 path_count_init();
1056 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1057 reverse_path_check_proc, current_file,
1058 current_file, current);
1059 if (error)
1060 break;
1061 }
1062 return error;
1063}
1064
957/* 1065/*
958 * Must be called with "mtx" held. 1066 * Must be called with "mtx" held.
959 */ 1067 */
@@ -1015,6 +1123,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1015 */ 1123 */
1016 ep_rbtree_insert(ep, epi); 1124 ep_rbtree_insert(ep, epi);
1017 1125
1126 /* now check if we've created too many backpaths */
1127 error = -EINVAL;
1128 if (reverse_path_check())
1129 goto error_remove_epi;
1130
1018 /* We have to drop the new item inside our item list to keep track of it */ 1131 /* We have to drop the new item inside our item list to keep track of it */
1019 spin_lock_irqsave(&ep->lock, flags); 1132 spin_lock_irqsave(&ep->lock, flags);
1020 1133
@@ -1039,6 +1152,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1039 1152
1040 return 0; 1153 return 0;
1041 1154
1155error_remove_epi:
1156 spin_lock(&tfile->f_lock);
1157 if (ep_is_linked(&epi->fllink))
1158 list_del_init(&epi->fllink);
1159 spin_unlock(&tfile->f_lock);
1160
1161 rb_erase(&epi->rbn, &ep->rbr);
1162
1042error_unregister: 1163error_unregister:
1043 ep_unregister_pollwait(ep, epi); 1164 ep_unregister_pollwait(ep, epi);
1044 1165
@@ -1303,18 +1424,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1303 int error = 0; 1424 int error = 0;
1304 struct file *file = priv; 1425 struct file *file = priv;
1305 struct eventpoll *ep = file->private_data; 1426 struct eventpoll *ep = file->private_data;
1427 struct eventpoll *ep_tovisit;
1306 struct rb_node *rbp; 1428 struct rb_node *rbp;
1307 struct epitem *epi; 1429 struct epitem *epi;
1308 1430
1309 mutex_lock_nested(&ep->mtx, call_nests + 1); 1431 mutex_lock_nested(&ep->mtx, call_nests + 1);
1432 ep->visited = 1;
1433 list_add(&ep->visited_list_link, &visited_list);
1310 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1434 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1311 epi = rb_entry(rbp, struct epitem, rbn); 1435 epi = rb_entry(rbp, struct epitem, rbn);
1312 if (unlikely(is_file_epoll(epi->ffd.file))) { 1436 if (unlikely(is_file_epoll(epi->ffd.file))) {
1437 ep_tovisit = epi->ffd.file->private_data;
1438 if (ep_tovisit->visited)
1439 continue;
1313 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1440 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1314 ep_loop_check_proc, epi->ffd.file, 1441 ep_loop_check_proc, epi->ffd.file,
1315 epi->ffd.file->private_data, current); 1442 ep_tovisit, current);
1316 if (error != 0) 1443 if (error != 0)
1317 break; 1444 break;
1445 } else {
1446 /*
1447 * If we've reached a file that is not associated with
1448 * an ep, then we need to check if the newly added
1449 * links are going to add too many wakeup paths. We do
1450 * this by adding it to the tfile_check_list, if it's
1451 * not already there, and calling reverse_path_check()
1452 * during ep_insert().
1453 */
1454 if (list_empty(&epi->ffd.file->f_tfile_llink))
1455 list_add(&epi->ffd.file->f_tfile_llink,
1456 &tfile_check_list);
1318 } 1457 }
1319 } 1458 }
1320 mutex_unlock(&ep->mtx); 1459 mutex_unlock(&ep->mtx);
@@ -1335,8 +1474,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1335 */ 1474 */
1336static int ep_loop_check(struct eventpoll *ep, struct file *file) 1475static int ep_loop_check(struct eventpoll *ep, struct file *file)
1337{ 1476{
1338 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1477 int ret;
1478 struct eventpoll *ep_cur, *ep_next;
1479
1480 ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1339 ep_loop_check_proc, file, ep, current); 1481 ep_loop_check_proc, file, ep, current);
1482 /* clear visited list */
1483 list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
1484 visited_list_link) {
1485 ep_cur->visited = 0;
1486 list_del(&ep_cur->visited_list_link);
1487 }
1488 return ret;
1489}
1490
1491static void clear_tfile_check_list(void)
1492{
1493 struct file *file;
1494
1495 /* first clear the tfile_check_list */
1496 while (!list_empty(&tfile_check_list)) {
1497 file = list_first_entry(&tfile_check_list, struct file,
1498 f_tfile_llink);
1499 list_del_init(&file->f_tfile_llink);
1500 }
1501 INIT_LIST_HEAD(&tfile_check_list);
1340} 1502}
1341 1503
1342/* 1504/*
@@ -1344,8 +1506,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
1344 */ 1506 */
1345SYSCALL_DEFINE1(epoll_create1, int, flags) 1507SYSCALL_DEFINE1(epoll_create1, int, flags)
1346{ 1508{
1347 int error; 1509 int error, fd;
1348 struct eventpoll *ep = NULL; 1510 struct eventpoll *ep = NULL;
1511 struct file *file;
1349 1512
1350 /* Check the EPOLL_* constant for consistency. */ 1513 /* Check the EPOLL_* constant for consistency. */
1351 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); 1514 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1362,11 +1525,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1362 * Creates all the items needed to setup an eventpoll file. That is, 1525 * Creates all the items needed to setup an eventpoll file. That is,
1363 * a file structure and a free file descriptor. 1526 * a file structure and a free file descriptor.
1364 */ 1527 */
1365 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1528 fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
1529 if (fd < 0) {
1530 error = fd;
1531 goto out_free_ep;
1532 }
1533 file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
1366 O_RDWR | (flags & O_CLOEXEC)); 1534 O_RDWR | (flags & O_CLOEXEC));
1367 if (error < 0) 1535 if (IS_ERR(file)) {
1368 ep_free(ep); 1536 error = PTR_ERR(file);
1369 1537 goto out_free_fd;
1538 }
1539 fd_install(fd, file);
1540 ep->file = file;
1541 return fd;
1542
1543out_free_fd:
1544 put_unused_fd(fd);
1545out_free_ep:
1546 ep_free(ep);
1370 return error; 1547 return error;
1371} 1548}
1372 1549
@@ -1432,21 +1609,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1432 /* 1609 /*
1433 * When we insert an epoll file descriptor, inside another epoll file 1610 * When we insert an epoll file descriptor, inside another epoll file
1434 * descriptor, there is the change of creating closed loops, which are 1611 * descriptor, there is the change of creating closed loops, which are
1435 * better be handled here, than in more critical paths. 1612 * better be handled here, than in more critical paths. While we are
1613 * checking for loops we also determine the list of files reachable
1614 * and hang them on the tfile_check_list, so we can check that we
1615 * haven't created too many possible wakeup paths.
1436 * 1616 *
1437 * We hold epmutex across the loop check and the insert in this case, in 1617 * We need to hold the epmutex across both ep_insert and ep_remove
1438 * order to prevent two separate inserts from racing and each doing the 1618 * b/c we want to make sure we are looking at a coherent view of
1439 * insert "at the same time" such that ep_loop_check passes on both 1619 * epoll network.
1440 * before either one does the insert, thereby creating a cycle.
1441 */ 1620 */
1442 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { 1621 if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
1443 mutex_lock(&epmutex); 1622 mutex_lock(&epmutex);
1444 did_lock_epmutex = 1; 1623 did_lock_epmutex = 1;
1445 error = -ELOOP;
1446 if (ep_loop_check(ep, tfile) != 0)
1447 goto error_tgt_fput;
1448 } 1624 }
1449 1625 if (op == EPOLL_CTL_ADD) {
1626 if (is_file_epoll(tfile)) {
1627 error = -ELOOP;
1628 if (ep_loop_check(ep, tfile) != 0)
1629 goto error_tgt_fput;
1630 } else
1631 list_add(&tfile->f_tfile_llink, &tfile_check_list);
1632 }
1450 1633
1451 mutex_lock_nested(&ep->mtx, 0); 1634 mutex_lock_nested(&ep->mtx, 0);
1452 1635
@@ -1465,6 +1648,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1465 error = ep_insert(ep, &epds, tfile, fd); 1648 error = ep_insert(ep, &epds, tfile, fd);
1466 } else 1649 } else
1467 error = -EEXIST; 1650 error = -EEXIST;
1651 clear_tfile_check_list();
1468 break; 1652 break;
1469 case EPOLL_CTL_DEL: 1653 case EPOLL_CTL_DEL:
1470 if (epi) 1654 if (epi)
@@ -1483,7 +1667,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1483 mutex_unlock(&ep->mtx); 1667 mutex_unlock(&ep->mtx);
1484 1668
1485error_tgt_fput: 1669error_tgt_fput:
1486 if (unlikely(did_lock_epmutex)) 1670 if (did_lock_epmutex)
1487 mutex_unlock(&epmutex); 1671 mutex_unlock(&epmutex);
1488 1672
1489 fput(tfile); 1673 fput(tfile);
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index f362733186a..657ab55beda 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -61,6 +61,7 @@ struct file;
61static inline void eventpoll_init_file(struct file *file) 61static inline void eventpoll_init_file(struct file *file)
62{ 62{
63 INIT_LIST_HEAD(&file->f_ep_links); 63 INIT_LIST_HEAD(&file->f_ep_links);
64 INIT_LIST_HEAD(&file->f_tfile_llink);
64} 65}
65 66
66 67
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7b17db7c5a6..d8ecb015ff8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -969,6 +969,7 @@ struct file {
969#ifdef CONFIG_EPOLL 969#ifdef CONFIG_EPOLL
970 /* Used by fs/eventpoll.c to link all the hooks to this file */ 970 /* Used by fs/eventpoll.c to link all the hooks to this file */
971 struct list_head f_ep_links; 971 struct list_head f_ep_links;
972 struct list_head f_tfile_llink;
972#endif /* #ifdef CONFIG_EPOLL */ 973#endif /* #ifdef CONFIG_EPOLL */
973 struct address_space *f_mapping; 974 struct address_space *f_mapping;
974#ifdef CONFIG_DEBUG_WRITECOUNT 975#ifdef CONFIG_DEBUG_WRITECOUNT