diff options
Diffstat (limited to 'net/core/sock.c')
| -rw-r--r-- | net/core/sock.c | 97 |
1 files changed, 69 insertions, 28 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index c5812bbc2cc9..37fe9b6adade 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -123,6 +123,7 @@ | |||
| 123 | #include <linux/net_tstamp.h> | 123 | #include <linux/net_tstamp.h> |
| 124 | #include <net/xfrm.h> | 124 | #include <net/xfrm.h> |
| 125 | #include <linux/ipsec.h> | 125 | #include <linux/ipsec.h> |
| 126 | #include <net/cls_cgroup.h> | ||
| 126 | 127 | ||
| 127 | #include <linux/filter.h> | 128 | #include <linux/filter.h> |
| 128 | 129 | ||
| @@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | |||
| 217 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 218 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
| 218 | EXPORT_SYMBOL(sysctl_optmem_max); | 219 | EXPORT_SYMBOL(sysctl_optmem_max); |
| 219 | 220 | ||
| 221 | #if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) | ||
| 222 | int net_cls_subsys_id = -1; | ||
| 223 | EXPORT_SYMBOL_GPL(net_cls_subsys_id); | ||
| 224 | #endif | ||
| 225 | |||
| 220 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | 226 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
| 221 | { | 227 | { |
| 222 | struct timeval tv; | 228 | struct timeval tv; |
| @@ -307,6 +313,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 307 | */ | 313 | */ |
| 308 | skb_len = skb->len; | 314 | skb_len = skb->len; |
| 309 | 315 | ||
| 316 | /* we escape from rcu protected region, make sure we dont leak | ||
| 317 | * a norefcounted dst | ||
| 318 | */ | ||
| 319 | skb_dst_force(skb); | ||
| 320 | |||
| 310 | spin_lock_irqsave(&list->lock, flags); | 321 | spin_lock_irqsave(&list->lock, flags); |
| 311 | skb->dropcount = atomic_read(&sk->sk_drops); | 322 | skb->dropcount = atomic_read(&sk->sk_drops); |
| 312 | __skb_queue_tail(list, skb); | 323 | __skb_queue_tail(list, skb); |
| @@ -327,6 +338,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) | |||
| 327 | 338 | ||
| 328 | skb->dev = NULL; | 339 | skb->dev = NULL; |
| 329 | 340 | ||
| 341 | if (sk_rcvqueues_full(sk, skb)) { | ||
| 342 | atomic_inc(&sk->sk_drops); | ||
| 343 | goto discard_and_relse; | ||
| 344 | } | ||
| 330 | if (nested) | 345 | if (nested) |
| 331 | bh_lock_sock_nested(sk); | 346 | bh_lock_sock_nested(sk); |
| 332 | else | 347 | else |
| @@ -364,11 +379,11 @@ EXPORT_SYMBOL(sk_reset_txq); | |||
| 364 | 379 | ||
| 365 | struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) | 380 | struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) |
| 366 | { | 381 | { |
| 367 | struct dst_entry *dst = sk->sk_dst_cache; | 382 | struct dst_entry *dst = __sk_dst_get(sk); |
| 368 | 383 | ||
| 369 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | 384 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { |
| 370 | sk_tx_queue_clear(sk); | 385 | sk_tx_queue_clear(sk); |
| 371 | sk->sk_dst_cache = NULL; | 386 | rcu_assign_pointer(sk->sk_dst_cache, NULL); |
| 372 | dst_release(dst); | 387 | dst_release(dst); |
| 373 | return NULL; | 388 | return NULL; |
| 374 | } | 389 | } |
| @@ -1041,6 +1056,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) | |||
| 1041 | module_put(owner); | 1056 | module_put(owner); |
| 1042 | } | 1057 | } |
| 1043 | 1058 | ||
| 1059 | #ifdef CONFIG_CGROUPS | ||
| 1060 | void sock_update_classid(struct sock *sk) | ||
| 1061 | { | ||
| 1062 | u32 classid = task_cls_classid(current); | ||
| 1063 | |||
| 1064 | if (classid && classid != sk->sk_classid) | ||
| 1065 | sk->sk_classid = classid; | ||
| 1066 | } | ||
| 1067 | EXPORT_SYMBOL(sock_update_classid); | ||
| 1068 | #endif | ||
| 1069 | |||
| 1044 | /** | 1070 | /** |
| 1045 | * sk_alloc - All socket objects are allocated here | 1071 | * sk_alloc - All socket objects are allocated here |
| 1046 | * @net: the applicable net namespace | 1072 | * @net: the applicable net namespace |
| @@ -1064,6 +1090,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | |||
| 1064 | sock_lock_init(sk); | 1090 | sock_lock_init(sk); |
| 1065 | sock_net_set(sk, get_net(net)); | 1091 | sock_net_set(sk, get_net(net)); |
| 1066 | atomic_set(&sk->sk_wmem_alloc, 1); | 1092 | atomic_set(&sk->sk_wmem_alloc, 1); |
| 1093 | |||
| 1094 | sock_update_classid(sk); | ||
| 1067 | } | 1095 | } |
| 1068 | 1096 | ||
| 1069 | return sk; | 1097 | return sk; |
| @@ -1157,7 +1185,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
| 1157 | skb_queue_head_init(&newsk->sk_async_wait_queue); | 1185 | skb_queue_head_init(&newsk->sk_async_wait_queue); |
| 1158 | #endif | 1186 | #endif |
| 1159 | 1187 | ||
| 1160 | rwlock_init(&newsk->sk_dst_lock); | 1188 | spin_lock_init(&newsk->sk_dst_lock); |
| 1161 | rwlock_init(&newsk->sk_callback_lock); | 1189 | rwlock_init(&newsk->sk_callback_lock); |
| 1162 | lockdep_set_class_and_name(&newsk->sk_callback_lock, | 1190 | lockdep_set_class_and_name(&newsk->sk_callback_lock, |
| 1163 | af_callback_keys + newsk->sk_family, | 1191 | af_callback_keys + newsk->sk_family, |
| @@ -1207,7 +1235,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
| 1207 | */ | 1235 | */ |
| 1208 | sk_refcnt_debug_inc(newsk); | 1236 | sk_refcnt_debug_inc(newsk); |
| 1209 | sk_set_socket(newsk, NULL); | 1237 | sk_set_socket(newsk, NULL); |
| 1210 | newsk->sk_sleep = NULL; | 1238 | newsk->sk_wq = NULL; |
| 1211 | 1239 | ||
| 1212 | if (newsk->sk_prot->sockets_allocated) | 1240 | if (newsk->sk_prot->sockets_allocated) |
| 1213 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1241 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); |
| @@ -1227,6 +1255,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | |||
| 1227 | sk->sk_route_caps = dst->dev->features; | 1255 | sk->sk_route_caps = dst->dev->features; |
| 1228 | if (sk->sk_route_caps & NETIF_F_GSO) | 1256 | if (sk->sk_route_caps & NETIF_F_GSO) |
| 1229 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; | 1257 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; |
| 1258 | sk->sk_route_caps &= ~sk->sk_route_nocaps; | ||
| 1230 | if (sk_can_gso(sk)) { | 1259 | if (sk_can_gso(sk)) { |
| 1231 | if (dst->header_len) { | 1260 | if (dst->header_len) { |
| 1232 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1261 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
| @@ -1395,7 +1424,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) | |||
| 1395 | if (signal_pending(current)) | 1424 | if (signal_pending(current)) |
| 1396 | break; | 1425 | break; |
| 1397 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 1426 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
| 1398 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 1427 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 1399 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) | 1428 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) |
| 1400 | break; | 1429 | break; |
| 1401 | if (sk->sk_shutdown & SEND_SHUTDOWN) | 1430 | if (sk->sk_shutdown & SEND_SHUTDOWN) |
| @@ -1404,7 +1433,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) | |||
| 1404 | break; | 1433 | break; |
| 1405 | timeo = schedule_timeout(timeo); | 1434 | timeo = schedule_timeout(timeo); |
| 1406 | } | 1435 | } |
| 1407 | finish_wait(sk->sk_sleep, &wait); | 1436 | finish_wait(sk_sleep(sk), &wait); |
| 1408 | return timeo; | 1437 | return timeo; |
| 1409 | } | 1438 | } |
| 1410 | 1439 | ||
| @@ -1531,6 +1560,7 @@ static void __release_sock(struct sock *sk) | |||
| 1531 | do { | 1560 | do { |
| 1532 | struct sk_buff *next = skb->next; | 1561 | struct sk_buff *next = skb->next; |
| 1533 | 1562 | ||
| 1563 | WARN_ON_ONCE(skb_dst_is_noref(skb)); | ||
| 1534 | skb->next = NULL; | 1564 | skb->next = NULL; |
| 1535 | sk_backlog_rcv(sk, skb); | 1565 | sk_backlog_rcv(sk, skb); |
| 1536 | 1566 | ||
| @@ -1570,11 +1600,11 @@ int sk_wait_data(struct sock *sk, long *timeo) | |||
| 1570 | int rc; | 1600 | int rc; |
| 1571 | DEFINE_WAIT(wait); | 1601 | DEFINE_WAIT(wait); |
| 1572 | 1602 | ||
| 1573 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 1603 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 1574 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 1604 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); |
| 1575 | rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); | 1605 | rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); |
| 1576 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 1606 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); |
| 1577 | finish_wait(sk->sk_sleep, &wait); | 1607 | finish_wait(sk_sleep(sk), &wait); |
| 1578 | return rc; | 1608 | return rc; |
| 1579 | } | 1609 | } |
| 1580 | EXPORT_SYMBOL(sk_wait_data); | 1610 | EXPORT_SYMBOL(sk_wait_data); |
| @@ -1796,41 +1826,53 @@ EXPORT_SYMBOL(sock_no_sendpage); | |||
| 1796 | 1826 | ||
| 1797 | static void sock_def_wakeup(struct sock *sk) | 1827 | static void sock_def_wakeup(struct sock *sk) |
| 1798 | { | 1828 | { |
| 1799 | read_lock(&sk->sk_callback_lock); | 1829 | struct socket_wq *wq; |
| 1800 | if (sk_has_sleeper(sk)) | 1830 | |
| 1801 | wake_up_interruptible_all(sk->sk_sleep); | 1831 | rcu_read_lock(); |
| 1802 | read_unlock(&sk->sk_callback_lock); | 1832 | wq = rcu_dereference(sk->sk_wq); |
| 1833 | if (wq_has_sleeper(wq)) | ||
| 1834 | wake_up_interruptible_all(&wq->wait); | ||
| 1835 | rcu_read_unlock(); | ||
| 1803 | } | 1836 | } |
| 1804 | 1837 | ||
| 1805 | static void sock_def_error_report(struct sock *sk) | 1838 | static void sock_def_error_report(struct sock *sk) |
| 1806 | { | 1839 | { |
| 1807 | read_lock(&sk->sk_callback_lock); | 1840 | struct socket_wq *wq; |
| 1808 | if (sk_has_sleeper(sk)) | 1841 | |
| 1809 | wake_up_interruptible_poll(sk->sk_sleep, POLLERR); | 1842 | rcu_read_lock(); |
| 1843 | wq = rcu_dereference(sk->sk_wq); | ||
| 1844 | if (wq_has_sleeper(wq)) | ||
| 1845 | wake_up_interruptible_poll(&wq->wait, POLLERR); | ||
| 1810 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); | 1846 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); |
| 1811 | read_unlock(&sk->sk_callback_lock); | 1847 | rcu_read_unlock(); |
| 1812 | } | 1848 | } |
| 1813 | 1849 | ||
| 1814 | static void sock_def_readable(struct sock *sk, int len) | 1850 | static void sock_def_readable(struct sock *sk, int len) |
| 1815 | { | 1851 | { |
| 1816 | read_lock(&sk->sk_callback_lock); | 1852 | struct socket_wq *wq; |
| 1817 | if (sk_has_sleeper(sk)) | 1853 | |
| 1818 | wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | | 1854 | rcu_read_lock(); |
| 1855 | wq = rcu_dereference(sk->sk_wq); | ||
| 1856 | if (wq_has_sleeper(wq)) | ||
| 1857 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | ||
| 1819 | POLLRDNORM | POLLRDBAND); | 1858 | POLLRDNORM | POLLRDBAND); |
| 1820 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | 1859 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
| 1821 | read_unlock(&sk->sk_callback_lock); | 1860 | rcu_read_unlock(); |
| 1822 | } | 1861 | } |
| 1823 | 1862 | ||
| 1824 | static void sock_def_write_space(struct sock *sk) | 1863 | static void sock_def_write_space(struct sock *sk) |
| 1825 | { | 1864 | { |
| 1826 | read_lock(&sk->sk_callback_lock); | 1865 | struct socket_wq *wq; |
| 1866 | |||
| 1867 | rcu_read_lock(); | ||
| 1827 | 1868 | ||
| 1828 | /* Do not wake up a writer until he can make "significant" | 1869 | /* Do not wake up a writer until he can make "significant" |
| 1829 | * progress. --DaveM | 1870 | * progress. --DaveM |
| 1830 | */ | 1871 | */ |
| 1831 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { | 1872 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
| 1832 | if (sk_has_sleeper(sk)) | 1873 | wq = rcu_dereference(sk->sk_wq); |
| 1833 | wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | | 1874 | if (wq_has_sleeper(wq)) |
| 1875 | wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | | ||
| 1834 | POLLWRNORM | POLLWRBAND); | 1876 | POLLWRNORM | POLLWRBAND); |
| 1835 | 1877 | ||
| 1836 | /* Should agree with poll, otherwise some programs break */ | 1878 | /* Should agree with poll, otherwise some programs break */ |
| @@ -1838,7 +1880,7 @@ static void sock_def_write_space(struct sock *sk) | |||
| 1838 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 1880 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
| 1839 | } | 1881 | } |
| 1840 | 1882 | ||
| 1841 | read_unlock(&sk->sk_callback_lock); | 1883 | rcu_read_unlock(); |
| 1842 | } | 1884 | } |
| 1843 | 1885 | ||
| 1844 | static void sock_def_destruct(struct sock *sk) | 1886 | static void sock_def_destruct(struct sock *sk) |
| @@ -1885,7 +1927,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
| 1885 | sk->sk_allocation = GFP_KERNEL; | 1927 | sk->sk_allocation = GFP_KERNEL; |
| 1886 | sk->sk_rcvbuf = sysctl_rmem_default; | 1928 | sk->sk_rcvbuf = sysctl_rmem_default; |
| 1887 | sk->sk_sndbuf = sysctl_wmem_default; | 1929 | sk->sk_sndbuf = sysctl_wmem_default; |
| 1888 | sk->sk_backlog.limit = sk->sk_rcvbuf << 1; | ||
| 1889 | sk->sk_state = TCP_CLOSE; | 1930 | sk->sk_state = TCP_CLOSE; |
| 1890 | sk_set_socket(sk, sock); | 1931 | sk_set_socket(sk, sock); |
| 1891 | 1932 | ||
| @@ -1893,12 +1934,12 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
| 1893 | 1934 | ||
| 1894 | if (sock) { | 1935 | if (sock) { |
| 1895 | sk->sk_type = sock->type; | 1936 | sk->sk_type = sock->type; |
| 1896 | sk->sk_sleep = &sock->wait; | 1937 | sk->sk_wq = sock->wq; |
| 1897 | sock->sk = sk; | 1938 | sock->sk = sk; |
| 1898 | } else | 1939 | } else |
| 1899 | sk->sk_sleep = NULL; | 1940 | sk->sk_wq = NULL; |
| 1900 | 1941 | ||
| 1901 | rwlock_init(&sk->sk_dst_lock); | 1942 | spin_lock_init(&sk->sk_dst_lock); |
| 1902 | rwlock_init(&sk->sk_callback_lock); | 1943 | rwlock_init(&sk->sk_callback_lock); |
| 1903 | lockdep_set_class_and_name(&sk->sk_callback_lock, | 1944 | lockdep_set_class_and_name(&sk->sk_callback_lock, |
| 1904 | af_callback_keys + sk->sk_family, | 1945 | af_callback_keys + sk->sk_family, |
