diff options
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r-- | net/ipv4/udp.c | 106 |
1 files changed, 86 insertions, 20 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 03c400ca14c5..2fb8d731026b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -82,6 +82,7 @@ | |||
82 | #include <asm/system.h> | 82 | #include <asm/system.h> |
83 | #include <asm/uaccess.h> | 83 | #include <asm/uaccess.h> |
84 | #include <asm/ioctls.h> | 84 | #include <asm/ioctls.h> |
85 | #include <linux/bootmem.h> | ||
85 | #include <linux/types.h> | 86 | #include <linux/types.h> |
86 | #include <linux/fcntl.h> | 87 | #include <linux/fcntl.h> |
87 | #include <linux/module.h> | 88 | #include <linux/module.h> |
@@ -110,10 +111,25 @@ | |||
110 | */ | 111 | */ |
111 | 112 | ||
112 | DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; | 113 | DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; |
114 | EXPORT_SYMBOL(udp_statistics); | ||
115 | |||
116 | DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; | ||
117 | EXPORT_SYMBOL(udp_stats_in6); | ||
113 | 118 | ||
114 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; | 119 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; |
115 | DEFINE_RWLOCK(udp_hash_lock); | 120 | DEFINE_RWLOCK(udp_hash_lock); |
116 | 121 | ||
122 | int sysctl_udp_mem[3] __read_mostly; | ||
123 | int sysctl_udp_rmem_min __read_mostly; | ||
124 | int sysctl_udp_wmem_min __read_mostly; | ||
125 | |||
126 | EXPORT_SYMBOL(sysctl_udp_mem); | ||
127 | EXPORT_SYMBOL(sysctl_udp_rmem_min); | ||
128 | EXPORT_SYMBOL(sysctl_udp_wmem_min); | ||
129 | |||
130 | atomic_t udp_memory_allocated; | ||
131 | EXPORT_SYMBOL(udp_memory_allocated); | ||
132 | |||
117 | static inline int __udp_lib_lport_inuse(__u16 num, | 133 | static inline int __udp_lib_lport_inuse(__u16 num, |
118 | const struct hlist_head udptable[]) | 134 | const struct hlist_head udptable[]) |
119 | { | 135 | { |
@@ -214,7 +230,7 @@ gotit: | |||
214 | if (sk_unhashed(sk)) { | 230 | if (sk_unhashed(sk)) { |
215 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; | 231 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
216 | sk_add_node(sk, head); | 232 | sk_add_node(sk, head); |
217 | sock_prot_inc_use(sk->sk_prot); | 233 | sock_prot_inuse_add(sk->sk_prot, 1); |
218 | } | 234 | } |
219 | error = 0; | 235 | error = 0; |
220 | fail: | 236 | fail: |
@@ -402,7 +418,7 @@ out: | |||
402 | 418 | ||
403 | void udp_err(struct sk_buff *skb, u32 info) | 419 | void udp_err(struct sk_buff *skb, u32 info) |
404 | { | 420 | { |
405 | return __udp4_lib_err(skb, info, udp_hash); | 421 | __udp4_lib_err(skb, info, udp_hash); |
406 | } | 422 | } |
407 | 423 | ||
408 | /* | 424 | /* |
@@ -471,6 +487,7 @@ static int udp_push_pending_frames(struct sock *sk) | |||
471 | struct sk_buff *skb; | 487 | struct sk_buff *skb; |
472 | struct udphdr *uh; | 488 | struct udphdr *uh; |
473 | int err = 0; | 489 | int err = 0; |
490 | int is_udplite = IS_UDPLITE(sk); | ||
474 | __wsum csum = 0; | 491 | __wsum csum = 0; |
475 | 492 | ||
476 | /* Grab the skbuff where UDP header space exists. */ | 493 | /* Grab the skbuff where UDP header space exists. */ |
@@ -486,7 +503,7 @@ static int udp_push_pending_frames(struct sock *sk) | |||
486 | uh->len = htons(up->len); | 503 | uh->len = htons(up->len); |
487 | uh->check = 0; | 504 | uh->check = 0; |
488 | 505 | ||
489 | if (up->pcflag) /* UDP-Lite */ | 506 | if (is_udplite) /* UDP-Lite */ |
490 | csum = udplite_csum_outgoing(sk, skb); | 507 | csum = udplite_csum_outgoing(sk, skb); |
491 | 508 | ||
492 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | 509 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ |
@@ -514,7 +531,7 @@ out: | |||
514 | up->len = 0; | 531 | up->len = 0; |
515 | up->pending = 0; | 532 | up->pending = 0; |
516 | if (!err) | 533 | if (!err) |
517 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); | 534 | UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); |
518 | return err; | 535 | return err; |
519 | } | 536 | } |
520 | 537 | ||
@@ -531,7 +548,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
531 | __be32 daddr, faddr, saddr; | 548 | __be32 daddr, faddr, saddr; |
532 | __be16 dport; | 549 | __be16 dport; |
533 | u8 tos; | 550 | u8 tos; |
534 | int err, is_udplite = up->pcflag; | 551 | int err, is_udplite = IS_UDPLITE(sk); |
535 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 552 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
536 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | 553 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); |
537 | 554 | ||
@@ -621,7 +638,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
621 | connected = 0; | 638 | connected = 0; |
622 | } | 639 | } |
623 | 640 | ||
624 | if (MULTICAST(daddr)) { | 641 | if (ipv4_is_multicast(daddr)) { |
625 | if (!ipc.oif) | 642 | if (!ipc.oif) |
626 | ipc.oif = inet->mc_index; | 643 | ipc.oif = inet->mc_index; |
627 | if (!saddr) | 644 | if (!saddr) |
@@ -643,7 +660,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
643 | { .sport = inet->sport, | 660 | { .sport = inet->sport, |
644 | .dport = dport } } }; | 661 | .dport = dport } } }; |
645 | security_sk_classify_flow(sk, &fl); | 662 | security_sk_classify_flow(sk, &fl); |
646 | err = ip_route_output_flow(&rt, &fl, sk, 1); | 663 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); |
647 | if (err) { | 664 | if (err) { |
648 | if (err == -ENETUNREACH) | 665 | if (err == -ENETUNREACH) |
649 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | 666 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); |
@@ -825,6 +842,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
825 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 842 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
826 | struct sk_buff *skb; | 843 | struct sk_buff *skb; |
827 | unsigned int ulen, copied; | 844 | unsigned int ulen, copied; |
845 | int peeked; | ||
828 | int err; | 846 | int err; |
829 | int is_udplite = IS_UDPLITE(sk); | 847 | int is_udplite = IS_UDPLITE(sk); |
830 | 848 | ||
@@ -838,7 +856,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
838 | return ip_recv_error(sk, msg, len); | 856 | return ip_recv_error(sk, msg, len); |
839 | 857 | ||
840 | try_again: | 858 | try_again: |
841 | skb = skb_recv_datagram(sk, flags, noblock, &err); | 859 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), |
860 | &peeked, &err); | ||
842 | if (!skb) | 861 | if (!skb) |
843 | goto out; | 862 | goto out; |
844 | 863 | ||
@@ -873,6 +892,9 @@ try_again: | |||
873 | if (err) | 892 | if (err) |
874 | goto out_free; | 893 | goto out_free; |
875 | 894 | ||
895 | if (!peeked) | ||
896 | UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); | ||
897 | |||
876 | sock_recv_timestamp(msg, sk, skb); | 898 | sock_recv_timestamp(msg, sk, skb); |
877 | 899 | ||
878 | /* Copy the address. */ | 900 | /* Copy the address. */ |
@@ -891,14 +913,17 @@ try_again: | |||
891 | err = ulen; | 913 | err = ulen; |
892 | 914 | ||
893 | out_free: | 915 | out_free: |
916 | lock_sock(sk); | ||
894 | skb_free_datagram(sk, skb); | 917 | skb_free_datagram(sk, skb); |
918 | release_sock(sk); | ||
895 | out: | 919 | out: |
896 | return err; | 920 | return err; |
897 | 921 | ||
898 | csum_copy_err: | 922 | csum_copy_err: |
899 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); | 923 | lock_sock(sk); |
900 | 924 | if (!skb_kill_datagram(sk, skb, flags)) | |
901 | skb_kill_datagram(sk, skb, flags); | 925 | UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); |
926 | release_sock(sk); | ||
902 | 927 | ||
903 | if (noblock) | 928 | if (noblock) |
904 | return -EAGAIN; | 929 | return -EAGAIN; |
@@ -940,6 +965,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
940 | { | 965 | { |
941 | struct udp_sock *up = udp_sk(sk); | 966 | struct udp_sock *up = udp_sk(sk); |
942 | int rc; | 967 | int rc; |
968 | int is_udplite = IS_UDPLITE(sk); | ||
943 | 969 | ||
944 | /* | 970 | /* |
945 | * Charge it to the socket, dropping if the queue is full. | 971 | * Charge it to the socket, dropping if the queue is full. |
@@ -967,7 +993,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
967 | 993 | ||
968 | ret = (*up->encap_rcv)(sk, skb); | 994 | ret = (*up->encap_rcv)(sk, skb); |
969 | if (ret <= 0) { | 995 | if (ret <= 0) { |
970 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); | 996 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, |
997 | is_udplite); | ||
971 | return -ret; | 998 | return -ret; |
972 | } | 999 | } |
973 | } | 1000 | } |
@@ -978,7 +1005,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
978 | /* | 1005 | /* |
979 | * UDP-Lite specific tests, ignored on UDP sockets | 1006 | * UDP-Lite specific tests, ignored on UDP sockets |
980 | */ | 1007 | */ |
981 | if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { | 1008 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { |
982 | 1009 | ||
983 | /* | 1010 | /* |
984 | * MIB statistics other than incrementing the error count are | 1011 | * MIB statistics other than incrementing the error count are |
@@ -1019,15 +1046,14 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | |||
1019 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { | 1046 | if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { |
1020 | /* Note that an ENOMEM error is charged twice */ | 1047 | /* Note that an ENOMEM error is charged twice */ |
1021 | if (rc == -ENOMEM) | 1048 | if (rc == -ENOMEM) |
1022 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); | 1049 | UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); |
1023 | goto drop; | 1050 | goto drop; |
1024 | } | 1051 | } |
1025 | 1052 | ||
1026 | UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); | ||
1027 | return 0; | 1053 | return 0; |
1028 | 1054 | ||
1029 | drop: | 1055 | drop: |
1030 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); | 1056 | UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); |
1031 | kfree_skb(skb); | 1057 | kfree_skb(skb); |
1032 | return -1; | 1058 | return -1; |
1033 | } | 1059 | } |
@@ -1062,7 +1088,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | |||
1062 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1088 | skb1 = skb_clone(skb, GFP_ATOMIC); |
1063 | 1089 | ||
1064 | if (skb1) { | 1090 | if (skb1) { |
1065 | int ret = udp_queue_rcv_skb(sk, skb1); | 1091 | int ret = 0; |
1092 | |||
1093 | bh_lock_sock_nested(sk); | ||
1094 | if (!sock_owned_by_user(sk)) | ||
1095 | ret = udp_queue_rcv_skb(sk, skb1); | ||
1096 | else | ||
1097 | sk_add_backlog(sk, skb1); | ||
1098 | bh_unlock_sock(sk); | ||
1099 | |||
1066 | if (ret > 0) | 1100 | if (ret > 0) |
1067 | /* we should probably re-process instead | 1101 | /* we should probably re-process instead |
1068 | * of dropping packets here. */ | 1102 | * of dropping packets here. */ |
@@ -1155,7 +1189,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1155 | inet_iif(skb), udptable); | 1189 | inet_iif(skb), udptable); |
1156 | 1190 | ||
1157 | if (sk != NULL) { | 1191 | if (sk != NULL) { |
1158 | int ret = udp_queue_rcv_skb(sk, skb); | 1192 | int ret = 0; |
1193 | bh_lock_sock_nested(sk); | ||
1194 | if (!sock_owned_by_user(sk)) | ||
1195 | ret = udp_queue_rcv_skb(sk, skb); | ||
1196 | else | ||
1197 | sk_add_backlog(sk, skb); | ||
1198 | bh_unlock_sock(sk); | ||
1159 | sock_put(sk); | 1199 | sock_put(sk); |
1160 | 1200 | ||
1161 | /* a return value > 0 means to resubmit the input, but | 1201 | /* a return value > 0 means to resubmit the input, but |
@@ -1236,6 +1276,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1236 | struct udp_sock *up = udp_sk(sk); | 1276 | struct udp_sock *up = udp_sk(sk); |
1237 | int val; | 1277 | int val; |
1238 | int err = 0; | 1278 | int err = 0; |
1279 | int is_udplite = IS_UDPLITE(sk); | ||
1239 | 1280 | ||
1240 | if (optlen<sizeof(int)) | 1281 | if (optlen<sizeof(int)) |
1241 | return -EINVAL; | 1282 | return -EINVAL; |
@@ -1277,7 +1318,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1277 | /* The sender sets actual checksum coverage length via this option. | 1318 | /* The sender sets actual checksum coverage length via this option. |
1278 | * The case coverage > packet length is handled by send module. */ | 1319 | * The case coverage > packet length is handled by send module. */ |
1279 | case UDPLITE_SEND_CSCOV: | 1320 | case UDPLITE_SEND_CSCOV: |
1280 | if (!up->pcflag) /* Disable the option on UDP sockets */ | 1321 | if (!is_udplite) /* Disable the option on UDP sockets */ |
1281 | return -ENOPROTOOPT; | 1322 | return -ENOPROTOOPT; |
1282 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ | 1323 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ |
1283 | val = 8; | 1324 | val = 8; |
@@ -1289,7 +1330,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1289 | * sense, this should be set to at least 8 (as done below). If zero is | 1330 | * sense, this should be set to at least 8 (as done below). If zero is |
1290 | * used, this again means full checksum coverage. */ | 1331 | * used, this again means full checksum coverage. */ |
1291 | case UDPLITE_RECV_CSCOV: | 1332 | case UDPLITE_RECV_CSCOV: |
1292 | if (!up->pcflag) /* Disable the option on UDP sockets */ | 1333 | if (!is_udplite) /* Disable the option on UDP sockets */ |
1293 | return -ENOPROTOOPT; | 1334 | return -ENOPROTOOPT; |
1294 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ | 1335 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ |
1295 | val = 8; | 1336 | val = 8; |
@@ -1449,6 +1490,10 @@ struct proto udp_prot = { | |||
1449 | .hash = udp_lib_hash, | 1490 | .hash = udp_lib_hash, |
1450 | .unhash = udp_lib_unhash, | 1491 | .unhash = udp_lib_unhash, |
1451 | .get_port = udp_v4_get_port, | 1492 | .get_port = udp_v4_get_port, |
1493 | .memory_allocated = &udp_memory_allocated, | ||
1494 | .sysctl_mem = sysctl_udp_mem, | ||
1495 | .sysctl_wmem = &sysctl_udp_wmem_min, | ||
1496 | .sysctl_rmem = &sysctl_udp_rmem_min, | ||
1452 | .obj_size = sizeof(struct udp_sock), | 1497 | .obj_size = sizeof(struct udp_sock), |
1453 | #ifdef CONFIG_COMPAT | 1498 | #ifdef CONFIG_COMPAT |
1454 | .compat_setsockopt = compat_udp_setsockopt, | 1499 | .compat_setsockopt = compat_udp_setsockopt, |
@@ -1505,6 +1550,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1505 | } | 1550 | } |
1506 | 1551 | ||
1507 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1552 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1553 | __acquires(udp_hash_lock) | ||
1508 | { | 1554 | { |
1509 | read_lock(&udp_hash_lock); | 1555 | read_lock(&udp_hash_lock); |
1510 | return *pos ? udp_get_idx(seq, *pos-1) : (void *)1; | 1556 | return *pos ? udp_get_idx(seq, *pos-1) : (void *)1; |
@@ -1524,6 +1570,7 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1524 | } | 1570 | } |
1525 | 1571 | ||
1526 | static void udp_seq_stop(struct seq_file *seq, void *v) | 1572 | static void udp_seq_stop(struct seq_file *seq, void *v) |
1573 | __releases(udp_hash_lock) | ||
1527 | { | 1574 | { |
1528 | read_unlock(&udp_hash_lock); | 1575 | read_unlock(&udp_hash_lock); |
1529 | } | 1576 | } |
@@ -1644,6 +1691,25 @@ void udp4_proc_exit(void) | |||
1644 | } | 1691 | } |
1645 | #endif /* CONFIG_PROC_FS */ | 1692 | #endif /* CONFIG_PROC_FS */ |
1646 | 1693 | ||
1694 | void __init udp_init(void) | ||
1695 | { | ||
1696 | unsigned long limit; | ||
1697 | |||
1698 | /* Set the pressure threshold up by the same strategy of TCP. It is a | ||
1699 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | ||
1700 | * toward zero with the amount of memory, with a floor of 128 pages. | ||
1701 | */ | ||
1702 | limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | ||
1703 | limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | ||
1704 | limit = max(limit, 128UL); | ||
1705 | sysctl_udp_mem[0] = limit / 4 * 3; | ||
1706 | sysctl_udp_mem[1] = limit; | ||
1707 | sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; | ||
1708 | |||
1709 | sysctl_udp_rmem_min = SK_MEM_QUANTUM; | ||
1710 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; | ||
1711 | } | ||
1712 | |||
1647 | EXPORT_SYMBOL(udp_disconnect); | 1713 | EXPORT_SYMBOL(udp_disconnect); |
1648 | EXPORT_SYMBOL(udp_hash); | 1714 | EXPORT_SYMBOL(udp_hash); |
1649 | EXPORT_SYMBOL(udp_hash_lock); | 1715 | EXPORT_SYMBOL(udp_hash_lock); |