diff options
-rw-r--r-- | fs/eventpoll.c | 48 |
1 files changed, 27 insertions, 21 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4c16127c96be..d4255be29bc6 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -134,7 +134,7 @@ struct poll_safewake { | |||
134 | * have an entry of this type linked to the "rbr" RB tree. | 134 | * have an entry of this type linked to the "rbr" RB tree. |
135 | */ | 135 | */ |
136 | struct epitem { | 136 | struct epitem { |
137 | /* RB-Tree node used to link this structure to the eventpoll rb-tree */ | 137 | /* RB tree node used to link this structure to the eventpoll RB tree */ |
138 | struct rb_node rbn; | 138 | struct rb_node rbn; |
139 | 139 | ||
140 | /* List header used to link this structure to the eventpoll ready list */ | 140 | /* List header used to link this structure to the eventpoll ready list */ |
@@ -191,7 +191,7 @@ struct eventpoll { | |||
191 | /* List of ready file descriptors */ | 191 | /* List of ready file descriptors */ |
192 | struct list_head rdllist; | 192 | struct list_head rdllist; |
193 | 193 | ||
194 | /* RB-Tree root used to store monitored fd structs */ | 194 | /* RB tree root used to store monitored fd structs */ |
195 | struct rb_root rbr; | 195 | struct rb_root rbr; |
196 | 196 | ||
197 | /* | 197 | /* |
@@ -241,7 +241,7 @@ static struct kmem_cache *epi_cache __read_mostly; | |||
241 | static struct kmem_cache *pwq_cache __read_mostly; | 241 | static struct kmem_cache *pwq_cache __read_mostly; |
242 | 242 | ||
243 | 243 | ||
244 | /* Setup the structure that is used as key for the rb-tree */ | 244 | /* Setup the structure that is used as key for the RB tree */ |
245 | static inline void ep_set_ffd(struct epoll_filefd *ffd, | 245 | static inline void ep_set_ffd(struct epoll_filefd *ffd, |
246 | struct file *file, int fd) | 246 | struct file *file, int fd) |
247 | { | 247 | { |
@@ -249,7 +249,7 @@ static inline void ep_set_ffd(struct epoll_filefd *ffd, | |||
249 | ffd->fd = fd; | 249 | ffd->fd = fd; |
250 | } | 250 | } |
251 | 251 | ||
252 | /* Compare rb-tree keys */ | 252 | /* Compare RB tree keys */ |
253 | static inline int ep_cmp_ffd(struct epoll_filefd *p1, | 253 | static inline int ep_cmp_ffd(struct epoll_filefd *p1, |
254 | struct epoll_filefd *p2) | 254 | struct epoll_filefd *p2) |
255 | { | 255 | { |
@@ -257,20 +257,20 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, | |||
257 | (p1->file < p2->file ? -1 : p1->fd - p2->fd)); | 257 | (p1->file < p2->file ? -1 : p1->fd - p2->fd)); |
258 | } | 258 | } |
259 | 259 | ||
260 | /* Special initialization for the rb-tree node to detect linkage */ | 260 | /* Special initialization for the RB tree node to detect linkage */ |
261 | static inline void ep_rb_initnode(struct rb_node *n) | 261 | static inline void ep_rb_initnode(struct rb_node *n) |
262 | { | 262 | { |
263 | rb_set_parent(n, n); | 263 | rb_set_parent(n, n); |
264 | } | 264 | } |
265 | 265 | ||
266 | /* Removes a node from the rb-tree and marks it for a fast is-linked check */ | 266 | /* Removes a node from the RB tree and marks it for a fast is-linked check */ |
267 | static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) | 267 | static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) |
268 | { | 268 | { |
269 | rb_erase(n, r); | 269 | rb_erase(n, r); |
270 | rb_set_parent(n, n); | 270 | rb_set_parent(n, n); |
271 | } | 271 | } |
272 | 272 | ||
273 | /* Fast check to verify that the item is linked to the main rb-tree */ | 273 | /* Fast check to verify that the item is linked to the main RB tree */ |
274 | static inline int ep_rb_linked(struct rb_node *n) | 274 | static inline int ep_rb_linked(struct rb_node *n) |
275 | { | 275 | { |
276 | return rb_parent(n) != n; | 276 | return rb_parent(n) != n; |
@@ -531,6 +531,8 @@ void eventpoll_release_file(struct file *file) | |||
531 | * We don't want to get "file->f_ep_lock" because it is not | 531 | * We don't want to get "file->f_ep_lock" because it is not |
532 | * necessary. It is not necessary because we're in the "struct file" | 532 | * necessary. It is not necessary because we're in the "struct file" |
533 | * cleanup path, and this means that noone is using this file anymore. | 533 | * cleanup path, and this means that noone is using this file anymore. |
534 | * So, for example, epoll_ctl() cannot hit here sicne if we reach this | ||
535 | * point, the file counter already went to zero and fget() would fail. | ||
534 | * The only hit might come from ep_free() but by holding the mutex | 536 | * The only hit might come from ep_free() but by holding the mutex |
535 | * will correctly serialize the operation. We do need to acquire | 537 | * will correctly serialize the operation. We do need to acquire |
536 | * "ep->mtx" after "epmutex" because ep_remove() requires it when called | 538 | * "ep->mtx" after "epmutex" because ep_remove() requires it when called |
@@ -802,7 +804,9 @@ error_unregister: | |||
802 | 804 | ||
803 | /* | 805 | /* |
804 | * We need to do this because an event could have been arrived on some | 806 | * We need to do this because an event could have been arrived on some |
805 | * allocated wait queue. | 807 | * allocated wait queue. Note that we don't care about the ep->ovflist |
808 | * list, since that is used/cleaned only inside a section bound by "mtx". | ||
809 | * And ep_insert() is called with "mtx" held. | ||
806 | */ | 810 | */ |
807 | spin_lock_irqsave(&ep->lock, flags); | 811 | spin_lock_irqsave(&ep->lock, flags); |
808 | if (ep_is_linked(&epi->rdllink)) | 812 | if (ep_is_linked(&epi->rdllink)) |
@@ -845,8 +849,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even | |||
845 | 849 | ||
846 | /* | 850 | /* |
847 | * If the item is "hot" and it is not registered inside the ready | 851 | * If the item is "hot" and it is not registered inside the ready |
848 | * list, push it inside. If the item is not "hot" and it is currently | 852 | * list, push it inside. |
849 | * registered inside the ready list, unlink it. | ||
850 | */ | 853 | */ |
851 | if (revents & event->events) { | 854 | if (revents & event->events) { |
852 | if (!ep_is_linked(&epi->rdllink)) { | 855 | if (!ep_is_linked(&epi->rdllink)) { |
@@ -966,15 +969,16 @@ errxit: | |||
966 | ep->ovflist = EP_UNACTIVE_PTR; | 969 | ep->ovflist = EP_UNACTIVE_PTR; |
967 | 970 | ||
968 | /* | 971 | /* |
969 | * In case of error in the event-send loop, we might still have items | 972 | * In case of error in the event-send loop, or in case the number of |
970 | * inside the "txlist". We need to splice them back inside ep->rdllist. | 973 | * ready events exceeds the userspace limit, we need to splice the |
974 | * "txlist" back inside ep->rdllist. | ||
971 | */ | 975 | */ |
972 | list_splice(&txlist, &ep->rdllist); | 976 | list_splice(&txlist, &ep->rdllist); |
973 | 977 | ||
974 | if (!list_empty(&ep->rdllist)) { | 978 | if (!list_empty(&ep->rdllist)) { |
975 | /* | 979 | /* |
976 | * Wake up (if active) both the eventpoll wait list and the ->poll() | 980 | * Wake up (if active) both the eventpoll wait list and the ->poll() |
977 | * wait list. | 981 | * wait list (delayed after we release the lock). |
978 | */ | 982 | */ |
979 | if (waitqueue_active(&ep->wq)) | 983 | if (waitqueue_active(&ep->wq)) |
980 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | | 984 | __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | |
@@ -1064,11 +1068,10 @@ retry: | |||
1064 | } | 1068 | } |
1065 | 1069 | ||
1066 | /* | 1070 | /* |
1067 | * It opens an eventpoll file descriptor by suggesting a storage of "size" | 1071 | * It opens an eventpoll file descriptor. The "size" parameter is there |
1068 | * file descriptors. The size parameter is just an hint about how to size | 1072 | * for historical reasons, when epoll was using an hash instead of an |
1069 | * data structures. It won't prevent the user to store more than "size" | 1073 | * RB tree. With the current implementation, the "size" parameter is ignored |
1070 | * file descriptors inside the epoll interface. It is the kernel part of | 1074 | * (besides sanity checks). |
1071 | * the userspace epoll_create(2). | ||
1072 | */ | 1075 | */ |
1073 | asmlinkage long sys_epoll_create(int size) | 1076 | asmlinkage long sys_epoll_create(int size) |
1074 | { | 1077 | { |
@@ -1114,8 +1117,7 @@ error_return: | |||
1114 | /* | 1117 | /* |
1115 | * The following function implements the controller interface for | 1118 | * The following function implements the controller interface for |
1116 | * the eventpoll file that enables the insertion/removal/change of | 1119 | * the eventpoll file that enables the insertion/removal/change of |
1117 | * file descriptors inside the interest set. It represents | 1120 | * file descriptors inside the interest set. |
1118 | * the kernel part of the user space epoll_ctl(2). | ||
1119 | */ | 1121 | */ |
1120 | asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, | 1122 | asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, |
1121 | struct epoll_event __user *event) | 1123 | struct epoll_event __user *event) |
@@ -1167,7 +1169,11 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, | |||
1167 | 1169 | ||
1168 | mutex_lock(&ep->mtx); | 1170 | mutex_lock(&ep->mtx); |
1169 | 1171 | ||
1170 | /* Try to lookup the file inside our RB tree */ | 1172 | /* |
1173 | * Try to lookup the file inside our RB tree, Since we grabbed "mtx" | ||
1174 | * above, we can be sure to be able to use the item looked up by | ||
1175 | * ep_find() till we release the mutex. | ||
1176 | */ | ||
1171 | epi = ep_find(ep, tfile, fd); | 1177 | epi = ep_find(ep, tfile, fd); |
1172 | 1178 | ||
1173 | error = -EINVAL; | 1179 | error = -EINVAL; |