diff options
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r-- | fs/eventpoll.c | 95 |
1 files changed, 69 insertions, 26 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 584249454822..f7fe7e3ce664 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -585,14 +585,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) | |||
585 | * @sproc: Pointer to the scan callback. | 585 | * @sproc: Pointer to the scan callback. |
586 | * @priv: Private opaque data passed to the @sproc callback. | 586 | * @priv: Private opaque data passed to the @sproc callback. |
587 | * @depth: The current depth of recursive f_op->poll calls. | 587 | * @depth: The current depth of recursive f_op->poll calls. |
588 | * @ep_locked: caller already holds ep->mtx | ||
588 | * | 589 | * |
589 | * Returns: The same integer error code returned by the @sproc callback. | 590 | * Returns: The same integer error code returned by the @sproc callback. |
590 | */ | 591 | */ |
591 | static int ep_scan_ready_list(struct eventpoll *ep, | 592 | static int ep_scan_ready_list(struct eventpoll *ep, |
592 | int (*sproc)(struct eventpoll *, | 593 | int (*sproc)(struct eventpoll *, |
593 | struct list_head *, void *), | 594 | struct list_head *, void *), |
594 | void *priv, | 595 | void *priv, int depth, bool ep_locked) |
595 | int depth) | ||
596 | { | 596 | { |
597 | int error, pwake = 0; | 597 | int error, pwake = 0; |
598 | unsigned long flags; | 598 | unsigned long flags; |
@@ -603,7 +603,9 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
603 | * We need to lock this because we could be hit by | 603 | * We need to lock this because we could be hit by |
604 | * eventpoll_release_file() and epoll_ctl(). | 604 | * eventpoll_release_file() and epoll_ctl(). |
605 | */ | 605 | */ |
606 | mutex_lock_nested(&ep->mtx, depth); | 606 | |
607 | if (!ep_locked) | ||
608 | mutex_lock_nested(&ep->mtx, depth); | ||
607 | 609 | ||
608 | /* | 610 | /* |
609 | * Steal the ready list, and re-init the original one to the | 611 | * Steal the ready list, and re-init the original one to the |
@@ -667,7 +669,8 @@ static int ep_scan_ready_list(struct eventpoll *ep, | |||
667 | } | 669 | } |
668 | spin_unlock_irqrestore(&ep->lock, flags); | 670 | spin_unlock_irqrestore(&ep->lock, flags); |
669 | 671 | ||
670 | mutex_unlock(&ep->mtx); | 672 | if (!ep_locked) |
673 | mutex_unlock(&ep->mtx); | ||
671 | 674 | ||
672 | /* We have to call this outside the lock */ | 675 | /* We have to call this outside the lock */ |
673 | if (pwake) | 676 | if (pwake) |
@@ -822,15 +825,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, | |||
822 | return 0; | 825 | return 0; |
823 | } | 826 | } |
824 | 827 | ||
828 | static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, | ||
829 | poll_table *pt); | ||
830 | |||
831 | struct readyevents_arg { | ||
832 | struct eventpoll *ep; | ||
833 | bool locked; | ||
834 | }; | ||
835 | |||
825 | static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) | 836 | static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) |
826 | { | 837 | { |
827 | return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1); | 838 | struct readyevents_arg *arg = priv; |
839 | |||
840 | return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL, | ||
841 | call_nests + 1, arg->locked); | ||
828 | } | 842 | } |
829 | 843 | ||
830 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | 844 | static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) |
831 | { | 845 | { |
832 | int pollflags; | 846 | int pollflags; |
833 | struct eventpoll *ep = file->private_data; | 847 | struct eventpoll *ep = file->private_data; |
848 | struct readyevents_arg arg; | ||
849 | |||
850 | /* | ||
851 | * During ep_insert() we already hold the ep->mtx for the tfile. | ||
852 | * Prevent re-aquisition. | ||
853 | */ | ||
854 | arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc); | ||
855 | arg.ep = ep; | ||
834 | 856 | ||
835 | /* Insert inside our poll wait queue */ | 857 | /* Insert inside our poll wait queue */ |
836 | poll_wait(file, &ep->poll_wait, wait); | 858 | poll_wait(file, &ep->poll_wait, wait); |
@@ -842,7 +864,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | |||
842 | * could re-enter here. | 864 | * could re-enter here. |
843 | */ | 865 | */ |
844 | pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, | 866 | pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, |
845 | ep_poll_readyevents_proc, ep, ep, current); | 867 | ep_poll_readyevents_proc, &arg, ep, current); |
846 | 868 | ||
847 | return pollflags != -1 ? pollflags : 0; | 869 | return pollflags != -1 ? pollflags : 0; |
848 | } | 870 | } |
@@ -1243,7 +1265,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi) | |||
1243 | * Must be called with "mtx" held. | 1265 | * Must be called with "mtx" held. |
1244 | */ | 1266 | */ |
1245 | static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | 1267 | static int ep_insert(struct eventpoll *ep, struct epoll_event *event, |
1246 | struct file *tfile, int fd) | 1268 | struct file *tfile, int fd, int full_check) |
1247 | { | 1269 | { |
1248 | int error, revents, pwake = 0; | 1270 | int error, revents, pwake = 0; |
1249 | unsigned long flags; | 1271 | unsigned long flags; |
@@ -1309,7 +1331,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1309 | 1331 | ||
1310 | /* now check if we've created too many backpaths */ | 1332 | /* now check if we've created too many backpaths */ |
1311 | error = -EINVAL; | 1333 | error = -EINVAL; |
1312 | if (reverse_path_check()) | 1334 | if (full_check && reverse_path_check()) |
1313 | goto error_remove_epi; | 1335 | goto error_remove_epi; |
1314 | 1336 | ||
1315 | /* We have to drop the new item inside our item list to keep track of it */ | 1337 | /* We have to drop the new item inside our item list to keep track of it */ |
@@ -1532,7 +1554,7 @@ static int ep_send_events(struct eventpoll *ep, | |||
1532 | esed.maxevents = maxevents; | 1554 | esed.maxevents = maxevents; |
1533 | esed.events = events; | 1555 | esed.events = events; |
1534 | 1556 | ||
1535 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0); | 1557 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false); |
1536 | } | 1558 | } |
1537 | 1559 | ||
1538 | static inline struct timespec ep_set_mstimeout(long ms) | 1560 | static inline struct timespec ep_set_mstimeout(long ms) |
@@ -1802,11 +1824,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1802 | struct epoll_event __user *, event) | 1824 | struct epoll_event __user *, event) |
1803 | { | 1825 | { |
1804 | int error; | 1826 | int error; |
1805 | int did_lock_epmutex = 0; | 1827 | int full_check = 0; |
1806 | struct fd f, tf; | 1828 | struct fd f, tf; |
1807 | struct eventpoll *ep; | 1829 | struct eventpoll *ep; |
1808 | struct epitem *epi; | 1830 | struct epitem *epi; |
1809 | struct epoll_event epds; | 1831 | struct epoll_event epds; |
1832 | struct eventpoll *tep = NULL; | ||
1810 | 1833 | ||
1811 | error = -EFAULT; | 1834 | error = -EFAULT; |
1812 | if (ep_op_has_event(op) && | 1835 | if (ep_op_has_event(op) && |
@@ -1855,23 +1878,40 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1855 | * and hang them on the tfile_check_list, so we can check that we | 1878 | * and hang them on the tfile_check_list, so we can check that we |
1856 | * haven't created too many possible wakeup paths. | 1879 | * haven't created too many possible wakeup paths. |
1857 | * | 1880 | * |
1858 | * We need to hold the epmutex across ep_insert to prevent | 1881 | * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when |
1859 | * multple adds from creating loops in parallel. | 1882 | * the epoll file descriptor is attaching directly to a wakeup source, |
1883 | * unless the epoll file descriptor is nested. The purpose of taking the | ||
1884 | * 'epmutex' on add is to prevent complex toplogies such as loops and | ||
1885 | * deep wakeup paths from forming in parallel through multiple | ||
1886 | * EPOLL_CTL_ADD operations. | ||
1860 | */ | 1887 | */ |
1888 | mutex_lock_nested(&ep->mtx, 0); | ||
1861 | if (op == EPOLL_CTL_ADD) { | 1889 | if (op == EPOLL_CTL_ADD) { |
1862 | mutex_lock(&epmutex); | 1890 | if (!list_empty(&f.file->f_ep_links) || |
1863 | did_lock_epmutex = 1; | 1891 | is_file_epoll(tf.file)) { |
1864 | if (is_file_epoll(tf.file)) { | 1892 | full_check = 1; |
1865 | error = -ELOOP; | 1893 | mutex_unlock(&ep->mtx); |
1866 | if (ep_loop_check(ep, tf.file) != 0) { | 1894 | mutex_lock(&epmutex); |
1867 | clear_tfile_check_list(); | 1895 | if (is_file_epoll(tf.file)) { |
1868 | goto error_tgt_fput; | 1896 | error = -ELOOP; |
1897 | if (ep_loop_check(ep, tf.file) != 0) { | ||
1898 | clear_tfile_check_list(); | ||
1899 | goto error_tgt_fput; | ||
1900 | } | ||
1901 | } else | ||
1902 | list_add(&tf.file->f_tfile_llink, | ||
1903 | &tfile_check_list); | ||
1904 | mutex_lock_nested(&ep->mtx, 0); | ||
1905 | if (is_file_epoll(tf.file)) { | ||
1906 | tep = tf.file->private_data; | ||
1907 | mutex_lock_nested(&tep->mtx, 1); | ||
1869 | } | 1908 | } |
1870 | } else | 1909 | } |
1871 | list_add(&tf.file->f_tfile_llink, &tfile_check_list); | 1910 | } |
1911 | if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) { | ||
1912 | tep = tf.file->private_data; | ||
1913 | mutex_lock_nested(&tep->mtx, 1); | ||
1872 | } | 1914 | } |
1873 | |||
1874 | mutex_lock_nested(&ep->mtx, 0); | ||
1875 | 1915 | ||
1876 | /* | 1916 | /* |
1877 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" | 1917 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" |
@@ -1885,10 +1925,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1885 | case EPOLL_CTL_ADD: | 1925 | case EPOLL_CTL_ADD: |
1886 | if (!epi) { | 1926 | if (!epi) { |
1887 | epds.events |= POLLERR | POLLHUP; | 1927 | epds.events |= POLLERR | POLLHUP; |
1888 | error = ep_insert(ep, &epds, tf.file, fd); | 1928 | error = ep_insert(ep, &epds, tf.file, fd, full_check); |
1889 | } else | 1929 | } else |
1890 | error = -EEXIST; | 1930 | error = -EEXIST; |
1891 | clear_tfile_check_list(); | 1931 | if (full_check) |
1932 | clear_tfile_check_list(); | ||
1892 | break; | 1933 | break; |
1893 | case EPOLL_CTL_DEL: | 1934 | case EPOLL_CTL_DEL: |
1894 | if (epi) | 1935 | if (epi) |
@@ -1904,10 +1945,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1904 | error = -ENOENT; | 1945 | error = -ENOENT; |
1905 | break; | 1946 | break; |
1906 | } | 1947 | } |
1948 | if (tep != NULL) | ||
1949 | mutex_unlock(&tep->mtx); | ||
1907 | mutex_unlock(&ep->mtx); | 1950 | mutex_unlock(&ep->mtx); |
1908 | 1951 | ||
1909 | error_tgt_fput: | 1952 | error_tgt_fput: |
1910 | if (did_lock_epmutex) | 1953 | if (full_check) |
1911 | mutex_unlock(&epmutex); | 1954 | mutex_unlock(&epmutex); |
1912 | 1955 | ||
1913 | fdput(tf); | 1956 | fdput(tf); |