aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-29 07:01:49 -0400
committerDavid S. Miller <davem@davemloft.net>2010-05-01 18:00:15 -0400
commit43815482370c510c569fd18edb57afcb0fa8cab6 (patch)
tree063efaae3758402b84f056438b704d1de68f7837 /net/core
parent83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff)
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/sock.c50
-rw-r--r--net/core/stream.c10
2 files changed, 38 insertions, 22 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 51041759517..94c4affdda9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1211 */ 1211 */
1212 sk_refcnt_debug_inc(newsk); 1212 sk_refcnt_debug_inc(newsk);
1213 sk_set_socket(newsk, NULL); 1213 sk_set_socket(newsk, NULL);
1214 newsk->sk_sleep = NULL; 1214 newsk->sk_wq = NULL;
1215 1215
1216 if (newsk->sk_prot->sockets_allocated) 1216 if (newsk->sk_prot->sockets_allocated)
1217 percpu_counter_inc(newsk->sk_prot->sockets_allocated); 1217 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
1800 1800
1801static void sock_def_wakeup(struct sock *sk) 1801static void sock_def_wakeup(struct sock *sk)
1802{ 1802{
1803 read_lock(&sk->sk_callback_lock); 1803 struct socket_wq *wq;
1804 if (sk_has_sleeper(sk)) 1804
1805 wake_up_interruptible_all(sk_sleep(sk)); 1805 rcu_read_lock();
1806 read_unlock(&sk->sk_callback_lock); 1806 wq = rcu_dereference(sk->sk_wq);
1807 if (wq_has_sleeper(wq))
1808 wake_up_interruptible_all(&wq->wait);
1809 rcu_read_unlock();
1807} 1810}
1808 1811
1809static void sock_def_error_report(struct sock *sk) 1812static void sock_def_error_report(struct sock *sk)
1810{ 1813{
1811 read_lock(&sk->sk_callback_lock); 1814 struct socket_wq *wq;
1812 if (sk_has_sleeper(sk)) 1815
1813 wake_up_interruptible_poll(sk_sleep(sk), POLLERR); 1816 rcu_read_lock();
1817 wq = rcu_dereference(sk->sk_wq);
1818 if (wq_has_sleeper(wq))
1819 wake_up_interruptible_poll(&wq->wait, POLLERR);
1814 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1820 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1815 read_unlock(&sk->sk_callback_lock); 1821 rcu_read_unlock();
1816} 1822}
1817 1823
1818static void sock_def_readable(struct sock *sk, int len) 1824static void sock_def_readable(struct sock *sk, int len)
1819{ 1825{
1820 read_lock(&sk->sk_callback_lock); 1826 struct socket_wq *wq;
1821 if (sk_has_sleeper(sk)) 1827
1822 wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | 1828 rcu_read_lock();
1829 wq = rcu_dereference(sk->sk_wq);
1830 if (wq_has_sleeper(wq))
1831 wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
1823 POLLRDNORM | POLLRDBAND); 1832 POLLRDNORM | POLLRDBAND);
1824 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1833 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1825 read_unlock(&sk->sk_callback_lock); 1834 rcu_read_unlock();
1826} 1835}
1827 1836
1828static void sock_def_write_space(struct sock *sk) 1837static void sock_def_write_space(struct sock *sk)
1829{ 1838{
1830 read_lock(&sk->sk_callback_lock); 1839 struct socket_wq *wq;
1840
1841 rcu_read_lock();
1831 1842
1832 /* Do not wake up a writer until he can make "significant" 1843 /* Do not wake up a writer until he can make "significant"
1833 * progress. --DaveM 1844 * progress. --DaveM
1834 */ 1845 */
1835 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1846 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1836 if (sk_has_sleeper(sk)) 1847 wq = rcu_dereference(sk->sk_wq);
1837 wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | 1848 if (wq_has_sleeper(wq))
1849 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1838 POLLWRNORM | POLLWRBAND); 1850 POLLWRNORM | POLLWRBAND);
1839 1851
1840 /* Should agree with poll, otherwise some programs break */ 1852 /* Should agree with poll, otherwise some programs break */
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk)
1842 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 1854 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1843 } 1855 }
1844 1856
1845 read_unlock(&sk->sk_callback_lock); 1857 rcu_read_unlock();
1846} 1858}
1847 1859
1848static void sock_def_destruct(struct sock *sk) 1860static void sock_def_destruct(struct sock *sk)
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1896 1908
1897 if (sock) { 1909 if (sock) {
1898 sk->sk_type = sock->type; 1910 sk->sk_type = sock->type;
1899 sk->sk_sleep = &sock->wait; 1911 sk->sk_wq = sock->wq;
1900 sock->sk = sk; 1912 sock->sk = sk;
1901 } else 1913 } else
1902 sk->sk_sleep = NULL; 1914 sk->sk_wq = NULL;
1903 1915
1904 spin_lock_init(&sk->sk_dst_lock); 1916 spin_lock_init(&sk->sk_dst_lock);
1905 rwlock_init(&sk->sk_callback_lock); 1917 rwlock_init(&sk->sk_callback_lock);
diff --git a/net/core/stream.c b/net/core/stream.c
index 7b3c3f30b10..cc196f42b8d 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,15 +28,19 @@
28void sk_stream_write_space(struct sock *sk) 28void sk_stream_write_space(struct sock *sk)
29{ 29{
30 struct socket *sock = sk->sk_socket; 30 struct socket *sock = sk->sk_socket;
31 struct socket_wq *wq;
31 32
32 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { 33 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
33 clear_bit(SOCK_NOSPACE, &sock->flags); 34 clear_bit(SOCK_NOSPACE, &sock->flags);
34 35
35 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 36 rcu_read_lock();
36 wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | 37 wq = rcu_dereference(sk->sk_wq);
38 if (wq_has_sleeper(wq))
39 wake_up_interruptible_poll(&wq->wait, POLLOUT |
37 POLLWRNORM | POLLWRBAND); 40 POLLWRNORM | POLLWRBAND);
38 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 41 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
39 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); 42 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
43 rcu_read_unlock();
40 } 44 }
41} 45}
42 46