aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavide Libenzi <davidel@xmailserver.org>2006-06-25 08:48:14 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-25 13:01:13 -0400
commit3419b23a919698f75944d3e0d97eb1d9c51e4bb6 (patch)
treee1b4b6aad754c6a40137c0a563d823074501da2d /fs
parent4ad3bcf3146aa12f41262bb5dd1d9f1778e085b1 (diff)
[PATCH] epoll: use unlocked wqueue operations
A few days ago Arjan signaled a lockdep red flag on epoll locks, and precisely between the epoll's device structure lock (->lock) and the wait queue head lock (->lock). Like I explained in another email, and directly to Arjan, this can't happen in reality because of the explicit check at eventpoll.c:592, that does not allow to drop an epoll fd inside the same epoll fd. Since lockdep is working on per-structure locks, it will never be able to know of policies enforced in other parts of the code. It was decided time ago of having the ability to drop epoll fds inside other epoll fds, that triggers a very trick wakeup operations (due to possibly reentrant callback-driven wakeups) handled by the ep_poll_safewake() function. While looking again at the code though, I noticed that all the operations done on the epoll's main structure wait queue head (->wq) are already protected by the epoll lock (->lock), so that locked-style functions can be used to manipulate the ->wq member. This makes both a lock-acquire save, and lockdep happy. Running totalmess on my dual opteron for a while did not reveal any problem so far: http://www.xmailserver.org/totalmess.c Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/eventpoll.c17
1 files changed, 10 insertions, 7 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 08e7e6a555ca..9c677bbd0b08 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * fs/eventpoll.c ( Efficent event polling implementation ) 2 * fs/eventpoll.c ( Efficent event polling implementation )
3 * Copyright (C) 2001,...,2003 Davide Libenzi 3 * Copyright (C) 2001,...,2006 Davide Libenzi
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1004 1004
1005 /* Notify waiting tasks that events are available */ 1005 /* Notify waiting tasks that events are available */
1006 if (waitqueue_active(&ep->wq)) 1006 if (waitqueue_active(&ep->wq))
1007 wake_up(&ep->wq); 1007 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
1008 if (waitqueue_active(&ep->poll_wait)) 1008 if (waitqueue_active(&ep->poll_wait))
1009 pwake++; 1009 pwake++;
1010 } 1010 }
@@ -1083,7 +1083,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1083 1083
1084 /* Notify waiting tasks that events are available */ 1084 /* Notify waiting tasks that events are available */
1085 if (waitqueue_active(&ep->wq)) 1085 if (waitqueue_active(&ep->wq))
1086 wake_up(&ep->wq); 1086 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1087 TASK_INTERRUPTIBLE);
1087 if (waitqueue_active(&ep->poll_wait)) 1088 if (waitqueue_active(&ep->poll_wait))
1088 pwake++; 1089 pwake++;
1089 } 1090 }
@@ -1260,7 +1261,8 @@ is_linked:
1260 * wait list. 1261 * wait list.
1261 */ 1262 */
1262 if (waitqueue_active(&ep->wq)) 1263 if (waitqueue_active(&ep->wq))
1263 wake_up(&ep->wq); 1264 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1265 TASK_INTERRUPTIBLE);
1264 if (waitqueue_active(&ep->poll_wait)) 1266 if (waitqueue_active(&ep->poll_wait))
1265 pwake++; 1267 pwake++;
1266 1268
@@ -1444,7 +1446,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1444 * wait list. 1446 * wait list.
1445 */ 1447 */
1446 if (waitqueue_active(&ep->wq)) 1448 if (waitqueue_active(&ep->wq))
1447 wake_up(&ep->wq); 1449 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1450 TASK_INTERRUPTIBLE);
1448 if (waitqueue_active(&ep->poll_wait)) 1451 if (waitqueue_active(&ep->poll_wait))
1449 pwake++; 1452 pwake++;
1450 } 1453 }
@@ -1516,7 +1519,7 @@ retry:
1516 * ep_poll_callback() when events will become available. 1519 * ep_poll_callback() when events will become available.
1517 */ 1520 */
1518 init_waitqueue_entry(&wait, current); 1521 init_waitqueue_entry(&wait, current);
1519 add_wait_queue(&ep->wq, &wait); 1522 __add_wait_queue(&ep->wq, &wait);
1520 1523
1521 for (;;) { 1524 for (;;) {
1522 /* 1525 /*
@@ -1536,7 +1539,7 @@ retry:
1536 jtimeout = schedule_timeout(jtimeout); 1539 jtimeout = schedule_timeout(jtimeout);
1537 write_lock_irqsave(&ep->lock, flags); 1540 write_lock_irqsave(&ep->lock, flags);
1538 } 1541 }
1539 remove_wait_queue(&ep->wq, &wait); 1542 __remove_wait_queue(&ep->wq, &wait);
1540 1543
1541 set_current_state(TASK_RUNNING); 1544 set_current_state(TASK_RUNNING);
1542 } 1545 }