aboutsummaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c95
1 files changed, 69 insertions, 26 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 584249454822..f7fe7e3ce664 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -585,14 +585,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
585 * @sproc: Pointer to the scan callback. 585 * @sproc: Pointer to the scan callback.
586 * @priv: Private opaque data passed to the @sproc callback. 586 * @priv: Private opaque data passed to the @sproc callback.
587 * @depth: The current depth of recursive f_op->poll calls. 587 * @depth: The current depth of recursive f_op->poll calls.
588 * @ep_locked: caller already holds ep->mtx
588 * 589 *
589 * Returns: The same integer error code returned by the @sproc callback. 590 * Returns: The same integer error code returned by the @sproc callback.
590 */ 591 */
591static int ep_scan_ready_list(struct eventpoll *ep, 592static int ep_scan_ready_list(struct eventpoll *ep,
592 int (*sproc)(struct eventpoll *, 593 int (*sproc)(struct eventpoll *,
593 struct list_head *, void *), 594 struct list_head *, void *),
594 void *priv, 595 void *priv, int depth, bool ep_locked)
595 int depth)
596{ 596{
597 int error, pwake = 0; 597 int error, pwake = 0;
598 unsigned long flags; 598 unsigned long flags;
@@ -603,7 +603,9 @@ static int ep_scan_ready_list(struct eventpoll *ep,
603 * We need to lock this because we could be hit by 603 * We need to lock this because we could be hit by
604 * eventpoll_release_file() and epoll_ctl(). 604 * eventpoll_release_file() and epoll_ctl().
605 */ 605 */
606 mutex_lock_nested(&ep->mtx, depth); 606
607 if (!ep_locked)
608 mutex_lock_nested(&ep->mtx, depth);
607 609
608 /* 610 /*
609 * Steal the ready list, and re-init the original one to the 611 * Steal the ready list, and re-init the original one to the
@@ -667,7 +669,8 @@ static int ep_scan_ready_list(struct eventpoll *ep,
667 } 669 }
668 spin_unlock_irqrestore(&ep->lock, flags); 670 spin_unlock_irqrestore(&ep->lock, flags);
669 671
670 mutex_unlock(&ep->mtx); 672 if (!ep_locked)
673 mutex_unlock(&ep->mtx);
671 674
672 /* We have to call this outside the lock */ 675 /* We have to call this outside the lock */
673 if (pwake) 676 if (pwake)
@@ -822,15 +825,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
822 return 0; 825 return 0;
823} 826}
824 827
828static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
829 poll_table *pt);
830
831struct readyevents_arg {
832 struct eventpoll *ep;
833 bool locked;
834};
835
825static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) 836static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
826{ 837{
827 return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1); 838 struct readyevents_arg *arg = priv;
839
840 return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
841 call_nests + 1, arg->locked);
828} 842}
829 843
830static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) 844static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
831{ 845{
832 int pollflags; 846 int pollflags;
833 struct eventpoll *ep = file->private_data; 847 struct eventpoll *ep = file->private_data;
848 struct readyevents_arg arg;
849
850 /*
851 * During ep_insert() we already hold the ep->mtx for the tfile.
852 * Prevent re-aquisition.
853 */
854 arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
855 arg.ep = ep;
834 856
835 /* Insert inside our poll wait queue */ 857 /* Insert inside our poll wait queue */
836 poll_wait(file, &ep->poll_wait, wait); 858 poll_wait(file, &ep->poll_wait, wait);
@@ -842,7 +864,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
842 * could re-enter here. 864 * could re-enter here.
843 */ 865 */
844 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, 866 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
845 ep_poll_readyevents_proc, ep, ep, current); 867 ep_poll_readyevents_proc, &arg, ep, current);
846 868
847 return pollflags != -1 ? pollflags : 0; 869 return pollflags != -1 ? pollflags : 0;
848} 870}
@@ -1243,7 +1265,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
1243 * Must be called with "mtx" held. 1265 * Must be called with "mtx" held.
1244 */ 1266 */
1245static int ep_insert(struct eventpoll *ep, struct epoll_event *event, 1267static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1246 struct file *tfile, int fd) 1268 struct file *tfile, int fd, int full_check)
1247{ 1269{
1248 int error, revents, pwake = 0; 1270 int error, revents, pwake = 0;
1249 unsigned long flags; 1271 unsigned long flags;
@@ -1309,7 +1331,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1309 1331
1310 /* now check if we've created too many backpaths */ 1332 /* now check if we've created too many backpaths */
1311 error = -EINVAL; 1333 error = -EINVAL;
1312 if (reverse_path_check()) 1334 if (full_check && reverse_path_check())
1313 goto error_remove_epi; 1335 goto error_remove_epi;
1314 1336
1315 /* We have to drop the new item inside our item list to keep track of it */ 1337 /* We have to drop the new item inside our item list to keep track of it */
@@ -1532,7 +1554,7 @@ static int ep_send_events(struct eventpoll *ep,
1532 esed.maxevents = maxevents; 1554 esed.maxevents = maxevents;
1533 esed.events = events; 1555 esed.events = events;
1534 1556
1535 return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0); 1557 return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
1536} 1558}
1537 1559
1538static inline struct timespec ep_set_mstimeout(long ms) 1560static inline struct timespec ep_set_mstimeout(long ms)
@@ -1802,11 +1824,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1802 struct epoll_event __user *, event) 1824 struct epoll_event __user *, event)
1803{ 1825{
1804 int error; 1826 int error;
1805 int did_lock_epmutex = 0; 1827 int full_check = 0;
1806 struct fd f, tf; 1828 struct fd f, tf;
1807 struct eventpoll *ep; 1829 struct eventpoll *ep;
1808 struct epitem *epi; 1830 struct epitem *epi;
1809 struct epoll_event epds; 1831 struct epoll_event epds;
1832 struct eventpoll *tep = NULL;
1810 1833
1811 error = -EFAULT; 1834 error = -EFAULT;
1812 if (ep_op_has_event(op) && 1835 if (ep_op_has_event(op) &&
@@ -1855,23 +1878,40 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1855 * and hang them on the tfile_check_list, so we can check that we 1878 * and hang them on the tfile_check_list, so we can check that we
1856 * haven't created too many possible wakeup paths. 1879 * haven't created too many possible wakeup paths.
1857 * 1880 *
1858 * We need to hold the epmutex across ep_insert to prevent 1881 * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
1859 * multple adds from creating loops in parallel. 1882 * the epoll file descriptor is attaching directly to a wakeup source,
1883 * unless the epoll file descriptor is nested. The purpose of taking the
1884 * 'epmutex' on add is to prevent complex toplogies such as loops and
1885 * deep wakeup paths from forming in parallel through multiple
1886 * EPOLL_CTL_ADD operations.
1860 */ 1887 */
1888 mutex_lock_nested(&ep->mtx, 0);
1861 if (op == EPOLL_CTL_ADD) { 1889 if (op == EPOLL_CTL_ADD) {
1862 mutex_lock(&epmutex); 1890 if (!list_empty(&f.file->f_ep_links) ||
1863 did_lock_epmutex = 1; 1891 is_file_epoll(tf.file)) {
1864 if (is_file_epoll(tf.file)) { 1892 full_check = 1;
1865 error = -ELOOP; 1893 mutex_unlock(&ep->mtx);
1866 if (ep_loop_check(ep, tf.file) != 0) { 1894 mutex_lock(&epmutex);
1867 clear_tfile_check_list(); 1895 if (is_file_epoll(tf.file)) {
1868 goto error_tgt_fput; 1896 error = -ELOOP;
1897 if (ep_loop_check(ep, tf.file) != 0) {
1898 clear_tfile_check_list();
1899 goto error_tgt_fput;
1900 }
1901 } else
1902 list_add(&tf.file->f_tfile_llink,
1903 &tfile_check_list);
1904 mutex_lock_nested(&ep->mtx, 0);
1905 if (is_file_epoll(tf.file)) {
1906 tep = tf.file->private_data;
1907 mutex_lock_nested(&tep->mtx, 1);
1869 } 1908 }
1870 } else 1909 }
1871 list_add(&tf.file->f_tfile_llink, &tfile_check_list); 1910 }
1911 if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
1912 tep = tf.file->private_data;
1913 mutex_lock_nested(&tep->mtx, 1);
1872 } 1914 }
1873
1874 mutex_lock_nested(&ep->mtx, 0);
1875 1915
1876 /* 1916 /*
1877 * Try to lookup the file inside our RB tree, Since we grabbed "mtx" 1917 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
@@ -1885,10 +1925,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1885 case EPOLL_CTL_ADD: 1925 case EPOLL_CTL_ADD:
1886 if (!epi) { 1926 if (!epi) {
1887 epds.events |= POLLERR | POLLHUP; 1927 epds.events |= POLLERR | POLLHUP;
1888 error = ep_insert(ep, &epds, tf.file, fd); 1928 error = ep_insert(ep, &epds, tf.file, fd, full_check);
1889 } else 1929 } else
1890 error = -EEXIST; 1930 error = -EEXIST;
1891 clear_tfile_check_list(); 1931 if (full_check)
1932 clear_tfile_check_list();
1892 break; 1933 break;
1893 case EPOLL_CTL_DEL: 1934 case EPOLL_CTL_DEL:
1894 if (epi) 1935 if (epi)
@@ -1904,10 +1945,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1904 error = -ENOENT; 1945 error = -ENOENT;
1905 break; 1946 break;
1906 } 1947 }
1948 if (tep != NULL)
1949 mutex_unlock(&tep->mtx);
1907 mutex_unlock(&ep->mtx); 1950 mutex_unlock(&ep->mtx);
1908 1951
1909error_tgt_fput: 1952error_tgt_fput:
1910 if (did_lock_epmutex) 1953 if (full_check)
1911 mutex_unlock(&epmutex); 1954 mutex_unlock(&epmutex);
1912 1955
1913 fdput(tf); 1956 fdput(tf);