diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 07:01:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 18:00:15 -0400 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /net/core | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/sock.c | 50 | ||||
-rw-r--r-- | net/core/stream.c | 10 |
2 files changed, 38 insertions, 22 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 51041759517..94c4affdda9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1211 | */ | 1211 | */ |
1212 | sk_refcnt_debug_inc(newsk); | 1212 | sk_refcnt_debug_inc(newsk); |
1213 | sk_set_socket(newsk, NULL); | 1213 | sk_set_socket(newsk, NULL); |
1214 | newsk->sk_sleep = NULL; | 1214 | newsk->sk_wq = NULL; |
1215 | 1215 | ||
1216 | if (newsk->sk_prot->sockets_allocated) | 1216 | if (newsk->sk_prot->sockets_allocated) |
1217 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1217 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); |
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage); | |||
1800 | 1800 | ||
1801 | static void sock_def_wakeup(struct sock *sk) | 1801 | static void sock_def_wakeup(struct sock *sk) |
1802 | { | 1802 | { |
1803 | read_lock(&sk->sk_callback_lock); | 1803 | struct socket_wq *wq; |
1804 | if (sk_has_sleeper(sk)) | 1804 | |
1805 | wake_up_interruptible_all(sk_sleep(sk)); | 1805 | rcu_read_lock(); |
1806 | read_unlock(&sk->sk_callback_lock); | 1806 | wq = rcu_dereference(sk->sk_wq); |
1807 | if (wq_has_sleeper(wq)) | ||
1808 | wake_up_interruptible_all(&wq->wait); | ||
1809 | rcu_read_unlock(); | ||
1807 | } | 1810 | } |
1808 | 1811 | ||
1809 | static void sock_def_error_report(struct sock *sk) | 1812 | static void sock_def_error_report(struct sock *sk) |
1810 | { | 1813 | { |
1811 | read_lock(&sk->sk_callback_lock); | 1814 | struct socket_wq *wq; |
1812 | if (sk_has_sleeper(sk)) | 1815 | |
1813 | wake_up_interruptible_poll(sk_sleep(sk), POLLERR); | 1816 | rcu_read_lock(); |
1817 | wq = rcu_dereference(sk->sk_wq); | ||
1818 | if (wq_has_sleeper(wq)) | ||
1819 | wake_up_interruptible_poll(&wq->wait, POLLERR); | ||
1814 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); | 1820 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); |
1815 | read_unlock(&sk->sk_callback_lock); | 1821 | rcu_read_unlock(); |
1816 | } | 1822 | } |
1817 | 1823 | ||
1818 | static void sock_def_readable(struct sock *sk, int len) | 1824 | static void sock_def_readable(struct sock *sk, int len) |
1819 | { | 1825 | { |
1820 | read_lock(&sk->sk_callback_lock); | 1826 | struct socket_wq *wq; |
1821 | if (sk_has_sleeper(sk)) | 1827 | |
1822 | wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | | 1828 | rcu_read_lock(); |
1829 | wq = rcu_dereference(sk->sk_wq); | ||
1830 | if (wq_has_sleeper(wq)) | ||
1831 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | ||
1823 | POLLRDNORM | POLLRDBAND); | 1832 | POLLRDNORM | POLLRDBAND); |
1824 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | 1833 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
1825 | read_unlock(&sk->sk_callback_lock); | 1834 | rcu_read_unlock(); |
1826 | } | 1835 | } |
1827 | 1836 | ||
1828 | static void sock_def_write_space(struct sock *sk) | 1837 | static void sock_def_write_space(struct sock *sk) |
1829 | { | 1838 | { |
1830 | read_lock(&sk->sk_callback_lock); | 1839 | struct socket_wq *wq; |
1840 | |||
1841 | rcu_read_lock(); | ||
1831 | 1842 | ||
1832 | /* Do not wake up a writer until he can make "significant" | 1843 | /* Do not wake up a writer until he can make "significant" |
1833 | * progress. --DaveM | 1844 | * progress. --DaveM |
1834 | */ | 1845 | */ |
1835 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { | 1846 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
1836 | if (sk_has_sleeper(sk)) | 1847 | wq = rcu_dereference(sk->sk_wq); |
1837 | wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | | 1848 | if (wq_has_sleeper(wq)) |
1849 | wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | | ||
1838 | POLLWRNORM | POLLWRBAND); | 1850 | POLLWRNORM | POLLWRBAND); |
1839 | 1851 | ||
1840 | /* Should agree with poll, otherwise some programs break */ | 1852 | /* Should agree with poll, otherwise some programs break */ |
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk) | |||
1842 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 1854 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
1843 | } | 1855 | } |
1844 | 1856 | ||
1845 | read_unlock(&sk->sk_callback_lock); | 1857 | rcu_read_unlock(); |
1846 | } | 1858 | } |
1847 | 1859 | ||
1848 | static void sock_def_destruct(struct sock *sk) | 1860 | static void sock_def_destruct(struct sock *sk) |
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
1896 | 1908 | ||
1897 | if (sock) { | 1909 | if (sock) { |
1898 | sk->sk_type = sock->type; | 1910 | sk->sk_type = sock->type; |
1899 | sk->sk_sleep = &sock->wait; | 1911 | sk->sk_wq = sock->wq; |
1900 | sock->sk = sk; | 1912 | sock->sk = sk; |
1901 | } else | 1913 | } else |
1902 | sk->sk_sleep = NULL; | 1914 | sk->sk_wq = NULL; |
1903 | 1915 | ||
1904 | spin_lock_init(&sk->sk_dst_lock); | 1916 | spin_lock_init(&sk->sk_dst_lock); |
1905 | rwlock_init(&sk->sk_callback_lock); | 1917 | rwlock_init(&sk->sk_callback_lock); |
diff --git a/net/core/stream.c b/net/core/stream.c index 7b3c3f30b10..cc196f42b8d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
@@ -28,15 +28,19 @@ | |||
28 | void sk_stream_write_space(struct sock *sk) | 28 | void sk_stream_write_space(struct sock *sk) |
29 | { | 29 | { |
30 | struct socket *sock = sk->sk_socket; | 30 | struct socket *sock = sk->sk_socket; |
31 | struct socket_wq *wq; | ||
31 | 32 | ||
32 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { | 33 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { |
33 | clear_bit(SOCK_NOSPACE, &sock->flags); | 34 | clear_bit(SOCK_NOSPACE, &sock->flags); |
34 | 35 | ||
35 | if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) | 36 | rcu_read_lock(); |
36 | wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | | 37 | wq = rcu_dereference(sk->sk_wq); |
38 | if (wq_has_sleeper(wq)) | ||
39 | wake_up_interruptible_poll(&wq->wait, POLLOUT | | ||
37 | POLLWRNORM | POLLWRBAND); | 40 | POLLWRNORM | POLLWRBAND); |
38 | if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) | 41 | if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) |
39 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); | 42 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); |
43 | rcu_read_unlock(); | ||
40 | } | 44 | } |
41 | } | 45 | } |
42 | 46 | ||