aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/af_inet.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/af_inet.c')
-rw-r--r--net/ipv4/af_inet.c187
1 files changed, 134 insertions, 53 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7d12c6a9b19b..6a1100c25a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -86,6 +86,7 @@
86#include <linux/poll.h> 86#include <linux/poll.h>
87#include <linux/netfilter_ipv4.h> 87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h> 88#include <linux/random.h>
89#include <linux/slab.h>
89 90
90#include <asm/uaccess.h> 91#include <asm/uaccess.h>
91#include <asm/system.h> 92#include <asm/system.h>
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
153 WARN_ON(sk->sk_forward_alloc); 154 WARN_ON(sk->sk_forward_alloc);
154 155
155 kfree(inet->opt); 156 kfree(inet->opt);
156 dst_release(sk->sk_dst_cache); 157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
157 sk_refcnt_debug_dec(sk); 158 sk_refcnt_debug_dec(sk);
158} 159}
159EXPORT_SYMBOL(inet_sock_destruct); 160EXPORT_SYMBOL(inet_sock_destruct);
@@ -354,6 +355,8 @@ lookup_protocol:
354 inet = inet_sk(sk); 355 inet = inet_sk(sk);
355 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
356 357
358 inet->nodefrag = 0;
359
357 if (SOCK_RAW == sock->type) { 360 if (SOCK_RAW == sock->type) {
358 inet->inet_num = protocol; 361 inet->inet_num = protocol;
359 if (IPPROTO_RAW == protocol) 362 if (IPPROTO_RAW == protocol)
@@ -418,6 +421,8 @@ int inet_release(struct socket *sock)
418 if (sk) { 421 if (sk) {
419 long timeout; 422 long timeout;
420 423
424 sock_rps_reset_flow(sk);
425
421 /* Applications forget to leave groups before exiting */ 426 /* Applications forget to leave groups before exiting */
422 ip_mc_drop_socket(sk); 427 ip_mc_drop_socket(sk);
423 428
@@ -530,6 +535,8 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
530{ 535{
531 struct sock *sk = sock->sk; 536 struct sock *sk = sock->sk;
532 537
538 if (addr_len < sizeof(uaddr->sa_family))
539 return -EINVAL;
533 if (uaddr->sa_family == AF_UNSPEC) 540 if (uaddr->sa_family == AF_UNSPEC)
534 return sk->sk_prot->disconnect(sk, flags); 541 return sk->sk_prot->disconnect(sk, flags);
535 542
@@ -543,7 +550,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
543{ 550{
544 DEFINE_WAIT(wait); 551 DEFINE_WAIT(wait);
545 552
546 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 553 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
547 554
548 /* Basic assumption: if someone sets sk->sk_err, he _must_ 555 /* Basic assumption: if someone sets sk->sk_err, he _must_
549 * change state of the socket from TCP_SYN_*. 556 * change state of the socket from TCP_SYN_*.
@@ -556,9 +563,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
556 lock_sock(sk); 563 lock_sock(sk);
557 if (signal_pending(current) || !timeo) 564 if (signal_pending(current) || !timeo)
558 break; 565 break;
559 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 566 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
560 } 567 }
561 finish_wait(sk->sk_sleep, &wait); 568 finish_wait(sk_sleep(sk), &wait);
562 return timeo; 569 return timeo;
563} 570}
564 571
@@ -573,6 +580,9 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
573 int err; 580 int err;
574 long timeo; 581 long timeo;
575 582
583 if (addr_len < sizeof(uaddr->sa_family))
584 return -EINVAL;
585
576 lock_sock(sk); 586 lock_sock(sk);
577 587
578 if (uaddr->sa_family == AF_UNSPEC) { 588 if (uaddr->sa_family == AF_UNSPEC) {
@@ -714,29 +724,51 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
714{ 724{
715 struct sock *sk = sock->sk; 725 struct sock *sk = sock->sk;
716 726
727 sock_rps_record_flow(sk);
728
717 /* We may need to bind the socket. */ 729 /* We may need to bind the socket. */
718 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 730 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
731 inet_autobind(sk))
719 return -EAGAIN; 732 return -EAGAIN;
720 733
721 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 734 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
722} 735}
723EXPORT_SYMBOL(inet_sendmsg); 736EXPORT_SYMBOL(inet_sendmsg);
724 737
725 738ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
726static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 739 size_t size, int flags)
727 size_t size, int flags)
728{ 740{
729 struct sock *sk = sock->sk; 741 struct sock *sk = sock->sk;
730 742
743 sock_rps_record_flow(sk);
744
731 /* We may need to bind the socket. */ 745 /* We may need to bind the socket. */
732 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 746 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
747 inet_autobind(sk))
733 return -EAGAIN; 748 return -EAGAIN;
734 749
735 if (sk->sk_prot->sendpage) 750 if (sk->sk_prot->sendpage)
736 return sk->sk_prot->sendpage(sk, page, offset, size, flags); 751 return sk->sk_prot->sendpage(sk, page, offset, size, flags);
737 return sock_no_sendpage(sock, page, offset, size, flags); 752 return sock_no_sendpage(sock, page, offset, size, flags);
738} 753}
754EXPORT_SYMBOL(inet_sendpage);
739 755
756int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
757 size_t size, int flags)
758{
759 struct sock *sk = sock->sk;
760 int addr_len = 0;
761 int err;
762
763 sock_rps_record_flow(sk);
764
765 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
766 flags & ~MSG_DONTWAIT, &addr_len);
767 if (err >= 0)
768 msg->msg_namelen = addr_len;
769 return err;
770}
771EXPORT_SYMBOL(inet_recvmsg);
740 772
741int inet_shutdown(struct socket *sock, int how) 773int inet_shutdown(struct socket *sock, int how)
742{ 774{
@@ -865,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
865 .shutdown = inet_shutdown, 897 .shutdown = inet_shutdown,
866 .setsockopt = sock_common_setsockopt, 898 .setsockopt = sock_common_setsockopt,
867 .getsockopt = sock_common_getsockopt, 899 .getsockopt = sock_common_getsockopt,
868 .sendmsg = tcp_sendmsg, 900 .sendmsg = inet_sendmsg,
869 .recvmsg = sock_common_recvmsg, 901 .recvmsg = inet_recvmsg,
870 .mmap = sock_no_mmap, 902 .mmap = sock_no_mmap,
871 .sendpage = tcp_sendpage, 903 .sendpage = inet_sendpage,
872 .splice_read = tcp_splice_read, 904 .splice_read = tcp_splice_read,
873#ifdef CONFIG_COMPAT 905#ifdef CONFIG_COMPAT
874 .compat_setsockopt = compat_sock_common_setsockopt, 906 .compat_setsockopt = compat_sock_common_setsockopt,
@@ -893,7 +925,7 @@ const struct proto_ops inet_dgram_ops = {
893 .setsockopt = sock_common_setsockopt, 925 .setsockopt = sock_common_setsockopt,
894 .getsockopt = sock_common_getsockopt, 926 .getsockopt = sock_common_getsockopt,
895 .sendmsg = inet_sendmsg, 927 .sendmsg = inet_sendmsg,
896 .recvmsg = sock_common_recvmsg, 928 .recvmsg = inet_recvmsg,
897 .mmap = sock_no_mmap, 929 .mmap = sock_no_mmap,
898 .sendpage = inet_sendpage, 930 .sendpage = inet_sendpage,
899#ifdef CONFIG_COMPAT 931#ifdef CONFIG_COMPAT
@@ -923,7 +955,7 @@ static const struct proto_ops inet_sockraw_ops = {
923 .setsockopt = sock_common_setsockopt, 955 .setsockopt = sock_common_setsockopt,
924 .getsockopt = sock_common_getsockopt, 956 .getsockopt = sock_common_getsockopt,
925 .sendmsg = inet_sendmsg, 957 .sendmsg = inet_sendmsg,
926 .recvmsg = sock_common_recvmsg, 958 .recvmsg = inet_recvmsg,
927 .mmap = sock_no_mmap, 959 .mmap = sock_no_mmap,
928 .sendpage = inet_sendpage, 960 .sendpage = inet_sendpage,
929#ifdef CONFIG_COMPAT 961#ifdef CONFIG_COMPAT
@@ -1073,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1073 if (err) 1105 if (err)
1074 return err; 1106 return err;
1075 1107
1076 sk_setup_caps(sk, &rt->u.dst); 1108 sk_setup_caps(sk, &rt->dst);
1077 1109
1078 new_saddr = rt->rt_src; 1110 new_saddr = rt->rt_src;
1079 1111
@@ -1139,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1139 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); 1171 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1140} 1172}
1141 if (!err) 1173 if (!err)
1142 sk_setup_caps(sk, &rt->u.dst); 1174 sk_setup_caps(sk, &rt->dst);
1143 else { 1175 else {
1144 /* Routing failed... */ 1176 /* Routing failed... */
1145 sk->sk_route_caps = 0; 1177 sk->sk_route_caps = 0;
@@ -1296,8 +1328,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1296 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 1328 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
1297 goto out_unlock; 1329 goto out_unlock;
1298 1330
1299 id = ntohl(*(u32 *)&iph->id); 1331 id = ntohl(*(__be32 *)&iph->id);
1300 flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); 1332 flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
1301 id >>= 16; 1333 id >>= 16;
1302 1334
1303 for (p = *head; p; p = p->next) { 1335 for (p = *head; p; p = p->next) {
@@ -1310,8 +1342,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1310 1342
1311 if ((iph->protocol ^ iph2->protocol) | 1343 if ((iph->protocol ^ iph2->protocol) |
1312 (iph->tos ^ iph2->tos) | 1344 (iph->tos ^ iph2->tos) |
1313 (iph->saddr ^ iph2->saddr) | 1345 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1314 (iph->daddr ^ iph2->daddr)) { 1346 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
1315 NAPI_GRO_CB(p)->same_flow = 0; 1347 NAPI_GRO_CB(p)->same_flow = 0;
1316 continue; 1348 continue;
1317 } 1349 }
@@ -1385,7 +1417,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1385} 1417}
1386EXPORT_SYMBOL_GPL(inet_ctl_sock_create); 1418EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1387 1419
1388unsigned long snmp_fold_field(void *mib[], int offt) 1420unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1389{ 1421{
1390 unsigned long res = 0; 1422 unsigned long res = 0;
1391 int i; 1423 int i;
@@ -1398,13 +1430,49 @@ unsigned long snmp_fold_field(void *mib[], int offt)
1398} 1430}
1399EXPORT_SYMBOL_GPL(snmp_fold_field); 1431EXPORT_SYMBOL_GPL(snmp_fold_field);
1400 1432
1401int snmp_mib_init(void *ptr[2], size_t mibsize) 1433#if BITS_PER_LONG==32
1434
1435u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1436{
1437 u64 res = 0;
1438 int cpu;
1439
1440 for_each_possible_cpu(cpu) {
1441 void *bhptr, *userptr;
1442 struct u64_stats_sync *syncp;
1443 u64 v_bh, v_user;
1444 unsigned int start;
1445
1446 /* first mib used by softirq context, we must use _bh() accessors */
1447 bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
1448 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1449 do {
1450 start = u64_stats_fetch_begin_bh(syncp);
1451 v_bh = *(((u64 *) bhptr) + offt);
1452 } while (u64_stats_fetch_retry_bh(syncp, start));
1453
1454 /* second mib used in USER context */
1455 userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
1456 syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
1457 do {
1458 start = u64_stats_fetch_begin(syncp);
1459 v_user = *(((u64 *) userptr) + offt);
1460 } while (u64_stats_fetch_retry(syncp, start));
1461
1462 res += v_bh + v_user;
1463 }
1464 return res;
1465}
1466EXPORT_SYMBOL_GPL(snmp_fold_field64);
1467#endif
1468
1469int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1402{ 1470{
1403 BUG_ON(ptr == NULL); 1471 BUG_ON(ptr == NULL);
1404 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1472 ptr[0] = __alloc_percpu(mibsize, align);
1405 if (!ptr[0]) 1473 if (!ptr[0])
1406 goto err0; 1474 goto err0;
1407 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1475 ptr[1] = __alloc_percpu(mibsize, align);
1408 if (!ptr[1]) 1476 if (!ptr[1])
1409 goto err1; 1477 goto err1;
1410 return 0; 1478 return 0;
@@ -1416,7 +1484,7 @@ err0:
1416} 1484}
1417EXPORT_SYMBOL_GPL(snmp_mib_init); 1485EXPORT_SYMBOL_GPL(snmp_mib_init);
1418 1486
1419void snmp_mib_free(void *ptr[2]) 1487void snmp_mib_free(void __percpu *ptr[2])
1420{ 1488{
1421 BUG_ON(ptr == NULL); 1489 BUG_ON(ptr == NULL);
1422 free_percpu(ptr[0]); 1490 free_percpu(ptr[0]);
@@ -1460,56 +1528,63 @@ static const struct net_protocol icmp_protocol = {
1460 1528
1461static __net_init int ipv4_mib_init_net(struct net *net) 1529static __net_init int ipv4_mib_init_net(struct net *net)
1462{ 1530{
1463 if (snmp_mib_init((void **)net->mib.tcp_statistics, 1531 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
1464 sizeof(struct tcp_mib)) < 0) 1532 sizeof(struct tcp_mib),
1533 __alignof__(struct tcp_mib)) < 0)
1465 goto err_tcp_mib; 1534 goto err_tcp_mib;
1466 if (snmp_mib_init((void **)net->mib.ip_statistics, 1535 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
1467 sizeof(struct ipstats_mib)) < 0) 1536 sizeof(struct ipstats_mib),
1537 __alignof__(struct ipstats_mib)) < 0)
1468 goto err_ip_mib; 1538 goto err_ip_mib;
1469 if (snmp_mib_init((void **)net->mib.net_statistics, 1539 if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
1470 sizeof(struct linux_mib)) < 0) 1540 sizeof(struct linux_mib),
1541 __alignof__(struct linux_mib)) < 0)
1471 goto err_net_mib; 1542 goto err_net_mib;
1472 if (snmp_mib_init((void **)net->mib.udp_statistics, 1543 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
1473 sizeof(struct udp_mib)) < 0) 1544 sizeof(struct udp_mib),
1545 __alignof__(struct udp_mib)) < 0)
1474 goto err_udp_mib; 1546 goto err_udp_mib;
1475 if (snmp_mib_init((void **)net->mib.udplite_statistics, 1547 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
1476 sizeof(struct udp_mib)) < 0) 1548 sizeof(struct udp_mib),
1549 __alignof__(struct udp_mib)) < 0)
1477 goto err_udplite_mib; 1550 goto err_udplite_mib;
1478 if (snmp_mib_init((void **)net->mib.icmp_statistics, 1551 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
1479 sizeof(struct icmp_mib)) < 0) 1552 sizeof(struct icmp_mib),
1553 __alignof__(struct icmp_mib)) < 0)
1480 goto err_icmp_mib; 1554 goto err_icmp_mib;
1481 if (snmp_mib_init((void **)net->mib.icmpmsg_statistics, 1555 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
1482 sizeof(struct icmpmsg_mib)) < 0) 1556 sizeof(struct icmpmsg_mib),
1557 __alignof__(struct icmpmsg_mib)) < 0)
1483 goto err_icmpmsg_mib; 1558 goto err_icmpmsg_mib;
1484 1559
1485 tcp_mib_init(net); 1560 tcp_mib_init(net);
1486 return 0; 1561 return 0;
1487 1562
1488err_icmpmsg_mib: 1563err_icmpmsg_mib:
1489 snmp_mib_free((void **)net->mib.icmp_statistics); 1564 snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
1490err_icmp_mib: 1565err_icmp_mib:
1491 snmp_mib_free((void **)net->mib.udplite_statistics); 1566 snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
1492err_udplite_mib: 1567err_udplite_mib:
1493 snmp_mib_free((void **)net->mib.udp_statistics); 1568 snmp_mib_free((void __percpu **)net->mib.udp_statistics);
1494err_udp_mib: 1569err_udp_mib:
1495 snmp_mib_free((void **)net->mib.net_statistics); 1570 snmp_mib_free((void __percpu **)net->mib.net_statistics);
1496err_net_mib: 1571err_net_mib:
1497 snmp_mib_free((void **)net->mib.ip_statistics); 1572 snmp_mib_free((void __percpu **)net->mib.ip_statistics);
1498err_ip_mib: 1573err_ip_mib:
1499 snmp_mib_free((void **)net->mib.tcp_statistics); 1574 snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
1500err_tcp_mib: 1575err_tcp_mib:
1501 return -ENOMEM; 1576 return -ENOMEM;
1502} 1577}
1503 1578
1504static __net_exit void ipv4_mib_exit_net(struct net *net) 1579static __net_exit void ipv4_mib_exit_net(struct net *net)
1505{ 1580{
1506 snmp_mib_free((void **)net->mib.icmpmsg_statistics); 1581 snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics);
1507 snmp_mib_free((void **)net->mib.icmp_statistics); 1582 snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
1508 snmp_mib_free((void **)net->mib.udplite_statistics); 1583 snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
1509 snmp_mib_free((void **)net->mib.udp_statistics); 1584 snmp_mib_free((void __percpu **)net->mib.udp_statistics);
1510 snmp_mib_free((void **)net->mib.net_statistics); 1585 snmp_mib_free((void __percpu **)net->mib.net_statistics);
1511 snmp_mib_free((void **)net->mib.ip_statistics); 1586 snmp_mib_free((void __percpu **)net->mib.ip_statistics);
1512 snmp_mib_free((void **)net->mib.tcp_statistics); 1587 snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
1513} 1588}
1514 1589
1515static __net_initdata struct pernet_operations ipv4_mib_ops = { 1590static __net_initdata struct pernet_operations ipv4_mib_ops = {
@@ -1546,9 +1621,13 @@ static int __init inet_init(void)
1546 1621
1547 BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); 1622 BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
1548 1623
1624 sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1625 if (!sysctl_local_reserved_ports)
1626 goto out;
1627
1549 rc = proto_register(&tcp_prot, 1); 1628 rc = proto_register(&tcp_prot, 1);
1550 if (rc) 1629 if (rc)
1551 goto out; 1630 goto out_free_reserved_ports;
1552 1631
1553 rc = proto_register(&udp_prot, 1); 1632 rc = proto_register(&udp_prot, 1);
1554 if (rc) 1633 if (rc)
@@ -1647,6 +1726,8 @@ out_unregister_udp_proto:
1647 proto_unregister(&udp_prot); 1726 proto_unregister(&udp_prot);
1648out_unregister_tcp_proto: 1727out_unregister_tcp_proto:
1649 proto_unregister(&tcp_prot); 1728 proto_unregister(&tcp_prot);
1729out_free_reserved_ports:
1730 kfree(sysctl_local_reserved_ports);
1650 goto out; 1731 goto out;
1651} 1732}
1652 1733