aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/sock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/sock.c')
-rw-r--r--net/core/sock.c97
1 files changed, 69 insertions, 28 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bbc2cc9..37fe9b6adade 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -123,6 +123,7 @@
123#include <linux/net_tstamp.h> 123#include <linux/net_tstamp.h>
124#include <net/xfrm.h> 124#include <net/xfrm.h>
125#include <linux/ipsec.h> 125#include <linux/ipsec.h>
126#include <net/cls_cgroup.h>
126 127
127#include <linux/filter.h> 128#include <linux/filter.h>
128 129
@@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
217int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 218int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
218EXPORT_SYMBOL(sysctl_optmem_max); 219EXPORT_SYMBOL(sysctl_optmem_max);
219 220
221#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
222int net_cls_subsys_id = -1;
223EXPORT_SYMBOL_GPL(net_cls_subsys_id);
224#endif
225
220static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 226static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
221{ 227{
222 struct timeval tv; 228 struct timeval tv;
@@ -307,6 +313,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
307 */ 313 */
308 skb_len = skb->len; 314 skb_len = skb->len;
309 315
316 /* we escape from rcu protected region, make sure we dont leak
317 * a norefcounted dst
318 */
319 skb_dst_force(skb);
320
310 spin_lock_irqsave(&list->lock, flags); 321 spin_lock_irqsave(&list->lock, flags);
311 skb->dropcount = atomic_read(&sk->sk_drops); 322 skb->dropcount = atomic_read(&sk->sk_drops);
312 __skb_queue_tail(list, skb); 323 __skb_queue_tail(list, skb);
@@ -327,6 +338,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
327 338
328 skb->dev = NULL; 339 skb->dev = NULL;
329 340
341 if (sk_rcvqueues_full(sk, skb)) {
342 atomic_inc(&sk->sk_drops);
343 goto discard_and_relse;
344 }
330 if (nested) 345 if (nested)
331 bh_lock_sock_nested(sk); 346 bh_lock_sock_nested(sk);
332 else 347 else
@@ -364,11 +379,11 @@ EXPORT_SYMBOL(sk_reset_txq);
364 379
365struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 380struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
366{ 381{
367 struct dst_entry *dst = sk->sk_dst_cache; 382 struct dst_entry *dst = __sk_dst_get(sk);
368 383
369 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 384 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
370 sk_tx_queue_clear(sk); 385 sk_tx_queue_clear(sk);
371 sk->sk_dst_cache = NULL; 386 rcu_assign_pointer(sk->sk_dst_cache, NULL);
372 dst_release(dst); 387 dst_release(dst);
373 return NULL; 388 return NULL;
374 } 389 }
@@ -1041,6 +1056,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1041 module_put(owner); 1056 module_put(owner);
1042} 1057}
1043 1058
1059#ifdef CONFIG_CGROUPS
1060void sock_update_classid(struct sock *sk)
1061{
1062 u32 classid = task_cls_classid(current);
1063
1064 if (classid && classid != sk->sk_classid)
1065 sk->sk_classid = classid;
1066}
1067EXPORT_SYMBOL(sock_update_classid);
1068#endif
1069
1044/** 1070/**
1045 * sk_alloc - All socket objects are allocated here 1071 * sk_alloc - All socket objects are allocated here
1046 * @net: the applicable net namespace 1072 * @net: the applicable net namespace
@@ -1064,6 +1090,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1064 sock_lock_init(sk); 1090 sock_lock_init(sk);
1065 sock_net_set(sk, get_net(net)); 1091 sock_net_set(sk, get_net(net));
1066 atomic_set(&sk->sk_wmem_alloc, 1); 1092 atomic_set(&sk->sk_wmem_alloc, 1);
1093
1094 sock_update_classid(sk);
1067 } 1095 }
1068 1096
1069 return sk; 1097 return sk;
@@ -1157,7 +1185,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1157 skb_queue_head_init(&newsk->sk_async_wait_queue); 1185 skb_queue_head_init(&newsk->sk_async_wait_queue);
1158#endif 1186#endif
1159 1187
1160 rwlock_init(&newsk->sk_dst_lock); 1188 spin_lock_init(&newsk->sk_dst_lock);
1161 rwlock_init(&newsk->sk_callback_lock); 1189 rwlock_init(&newsk->sk_callback_lock);
1162 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1190 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1163 af_callback_keys + newsk->sk_family, 1191 af_callback_keys + newsk->sk_family,
@@ -1207,7 +1235,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1207 */ 1235 */
1208 sk_refcnt_debug_inc(newsk); 1236 sk_refcnt_debug_inc(newsk);
1209 sk_set_socket(newsk, NULL); 1237 sk_set_socket(newsk, NULL);
1210 newsk->sk_sleep = NULL; 1238 newsk->sk_wq = NULL;
1211 1239
1212 if (newsk->sk_prot->sockets_allocated) 1240 if (newsk->sk_prot->sockets_allocated)
1213 percpu_counter_inc(newsk->sk_prot->sockets_allocated); 1241 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1227,6 +1255,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1227 sk->sk_route_caps = dst->dev->features; 1255 sk->sk_route_caps = dst->dev->features;
1228 if (sk->sk_route_caps & NETIF_F_GSO) 1256 if (sk->sk_route_caps & NETIF_F_GSO)
1229 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1257 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1258 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1230 if (sk_can_gso(sk)) { 1259 if (sk_can_gso(sk)) {
1231 if (dst->header_len) { 1260 if (dst->header_len) {
1232 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1261 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
@@ -1395,7 +1424,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1395 if (signal_pending(current)) 1424 if (signal_pending(current))
1396 break; 1425 break;
1397 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1426 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1398 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1427 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1399 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1428 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1400 break; 1429 break;
1401 if (sk->sk_shutdown & SEND_SHUTDOWN) 1430 if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1404,7 +1433,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1404 break; 1433 break;
1405 timeo = schedule_timeout(timeo); 1434 timeo = schedule_timeout(timeo);
1406 } 1435 }
1407 finish_wait(sk->sk_sleep, &wait); 1436 finish_wait(sk_sleep(sk), &wait);
1408 return timeo; 1437 return timeo;
1409} 1438}
1410 1439
@@ -1531,6 +1560,7 @@ static void __release_sock(struct sock *sk)
1531 do { 1560 do {
1532 struct sk_buff *next = skb->next; 1561 struct sk_buff *next = skb->next;
1533 1562
1563 WARN_ON_ONCE(skb_dst_is_noref(skb));
1534 skb->next = NULL; 1564 skb->next = NULL;
1535 sk_backlog_rcv(sk, skb); 1565 sk_backlog_rcv(sk, skb);
1536 1566
@@ -1570,11 +1600,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
1570 int rc; 1600 int rc;
1571 DEFINE_WAIT(wait); 1601 DEFINE_WAIT(wait);
1572 1602
1573 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1603 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1574 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1604 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1575 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1605 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1576 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1606 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1577 finish_wait(sk->sk_sleep, &wait); 1607 finish_wait(sk_sleep(sk), &wait);
1578 return rc; 1608 return rc;
1579} 1609}
1580EXPORT_SYMBOL(sk_wait_data); 1610EXPORT_SYMBOL(sk_wait_data);
@@ -1796,41 +1826,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
1796 1826
1797static void sock_def_wakeup(struct sock *sk) 1827static void sock_def_wakeup(struct sock *sk)
1798{ 1828{
1799 read_lock(&sk->sk_callback_lock); 1829 struct socket_wq *wq;
1800 if (sk_has_sleeper(sk)) 1830
1801 wake_up_interruptible_all(sk->sk_sleep); 1831 rcu_read_lock();
1802 read_unlock(&sk->sk_callback_lock); 1832 wq = rcu_dereference(sk->sk_wq);
1833 if (wq_has_sleeper(wq))
1834 wake_up_interruptible_all(&wq->wait);
1835 rcu_read_unlock();
1803} 1836}
1804 1837
1805static void sock_def_error_report(struct sock *sk) 1838static void sock_def_error_report(struct sock *sk)
1806{ 1839{
1807 read_lock(&sk->sk_callback_lock); 1840 struct socket_wq *wq;
1808 if (sk_has_sleeper(sk)) 1841
1809 wake_up_interruptible_poll(sk->sk_sleep, POLLERR); 1842 rcu_read_lock();
1843 wq = rcu_dereference(sk->sk_wq);
1844 if (wq_has_sleeper(wq))
1845 wake_up_interruptible_poll(&wq->wait, POLLERR);
1810 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1846 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1811 read_unlock(&sk->sk_callback_lock); 1847 rcu_read_unlock();
1812} 1848}
1813 1849
1814static void sock_def_readable(struct sock *sk, int len) 1850static void sock_def_readable(struct sock *sk, int len)
1815{ 1851{
1816 read_lock(&sk->sk_callback_lock); 1852 struct socket_wq *wq;
1817 if (sk_has_sleeper(sk)) 1853
1818 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | 1854 rcu_read_lock();
1855 wq = rcu_dereference(sk->sk_wq);
1856 if (wq_has_sleeper(wq))
1857 wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
1819 POLLRDNORM | POLLRDBAND); 1858 POLLRDNORM | POLLRDBAND);
1820 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1859 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1821 read_unlock(&sk->sk_callback_lock); 1860 rcu_read_unlock();
1822} 1861}
1823 1862
1824static void sock_def_write_space(struct sock *sk) 1863static void sock_def_write_space(struct sock *sk)
1825{ 1864{
1826 read_lock(&sk->sk_callback_lock); 1865 struct socket_wq *wq;
1866
1867 rcu_read_lock();
1827 1868
1828 /* Do not wake up a writer until he can make "significant" 1869 /* Do not wake up a writer until he can make "significant"
1829 * progress. --DaveM 1870 * progress. --DaveM
1830 */ 1871 */
1831 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1872 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1832 if (sk_has_sleeper(sk)) 1873 wq = rcu_dereference(sk->sk_wq);
1833 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | 1874 if (wq_has_sleeper(wq))
1875 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1834 POLLWRNORM | POLLWRBAND); 1876 POLLWRNORM | POLLWRBAND);
1835 1877
1836 /* Should agree with poll, otherwise some programs break */ 1878 /* Should agree with poll, otherwise some programs break */
@@ -1838,7 +1880,7 @@ static void sock_def_write_space(struct sock *sk)
1838 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 1880 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1839 } 1881 }
1840 1882
1841 read_unlock(&sk->sk_callback_lock); 1883 rcu_read_unlock();
1842} 1884}
1843 1885
1844static void sock_def_destruct(struct sock *sk) 1886static void sock_def_destruct(struct sock *sk)
@@ -1885,7 +1927,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1885 sk->sk_allocation = GFP_KERNEL; 1927 sk->sk_allocation = GFP_KERNEL;
1886 sk->sk_rcvbuf = sysctl_rmem_default; 1928 sk->sk_rcvbuf = sysctl_rmem_default;
1887 sk->sk_sndbuf = sysctl_wmem_default; 1929 sk->sk_sndbuf = sysctl_wmem_default;
1888 sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
1889 sk->sk_state = TCP_CLOSE; 1930 sk->sk_state = TCP_CLOSE;
1890 sk_set_socket(sk, sock); 1931 sk_set_socket(sk, sock);
1891 1932
@@ -1893,12 +1934,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1893 1934
1894 if (sock) { 1935 if (sock) {
1895 sk->sk_type = sock->type; 1936 sk->sk_type = sock->type;
1896 sk->sk_sleep = &sock->wait; 1937 sk->sk_wq = sock->wq;
1897 sock->sk = sk; 1938 sock->sk = sk;
1898 } else 1939 } else
1899 sk->sk_sleep = NULL; 1940 sk->sk_wq = NULL;
1900 1941
1901 rwlock_init(&sk->sk_dst_lock); 1942 spin_lock_init(&sk->sk_dst_lock);
1902 rwlock_init(&sk->sk_callback_lock); 1943 rwlock_init(&sk->sk_callback_lock);
1903 lockdep_set_class_and_name(&sk->sk_callback_lock, 1944 lockdep_set_class_and_name(&sk->sk_callback_lock,
1904 af_callback_keys + sk->sk_family, 1945 af_callback_keys + sk->sk_family,