diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 27 | ||||
-rw-r--r-- | include/net/udp.h | 9 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 5 | ||||
-rw-r--r-- | net/ipv4/proc.c | 3 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 31 | ||||
-rw-r--r-- | net/ipv4/udp.c | 57 | ||||
-rw-r--r-- | net/ipv6/udp.c | 32 |
7 files changed, 157 insertions, 7 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 6f7872ba1def..17a6e46fbd43 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -446,6 +446,33 @@ tcp_dma_copybreak - INTEGER | |||
446 | and CONFIG_NET_DMA is enabled. | 446 | and CONFIG_NET_DMA is enabled. |
447 | Default: 4096 | 447 | Default: 4096 |
448 | 448 | ||
449 | UDP variables: | ||
450 | |||
451 | udp_mem - vector of 3 INTEGERs: min, pressure, max | ||
452 | Number of pages allowed for queueing by all UDP sockets. | ||
453 | |||
454 | min: Below this number of pages UDP is not bothered about its | ||
455 | memory appetite. When amount of memory allocated by UDP exceeds | ||
456 | this number, UDP starts to moderate memory usage. | ||
457 | |||
458 | pressure: This value was introduced to follow format of tcp_mem. | ||
459 | |||
460 | max: Number of pages allowed for queueing by all UDP sockets. | ||
461 | |||
462 | Default is calculated at boot time from amount of available memory. | ||
463 | |||
464 | udp_rmem_min - INTEGER | ||
465 | Minimal size of receive buffer used by UDP sockets in moderation. | ||
466 | Each UDP socket is able to use the size for receiving data, even if | ||
467 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. | ||
468 | Default: 4096 | ||
469 | |||
470 | udp_wmem_min - INTEGER | ||
471 | Minimal size of send buffer used by UDP sockets in moderation. | ||
472 | Each UDP socket is able to use the size for sending data, even if | ||
473 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. | ||
474 | Default: 4096 | ||
475 | |||
449 | CIPSOv4 Variables: | 476 | CIPSOv4 Variables: |
450 | 477 | ||
451 | cipso_cache_enable - BOOLEAN | 478 | cipso_cache_enable - BOOLEAN |
diff --git a/include/net/udp.h b/include/net/udp.h index 98cb09ca3a27..93796beac8ff 100644 --- a/include/net/udp.h +++ b/include/net/udp.h | |||
@@ -65,6 +65,13 @@ extern rwlock_t udp_hash_lock; | |||
65 | 65 | ||
66 | extern struct proto udp_prot; | 66 | extern struct proto udp_prot; |
67 | 67 | ||
68 | extern atomic_t udp_memory_allocated; | ||
69 | |||
70 | /* sysctl variables for udp */ | ||
71 | extern int sysctl_udp_mem[3]; | ||
72 | extern int sysctl_udp_rmem_min; | ||
73 | extern int sysctl_udp_wmem_min; | ||
74 | |||
68 | struct sk_buff; | 75 | struct sk_buff; |
69 | 76 | ||
70 | /* | 77 | /* |
@@ -198,4 +205,6 @@ extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); | |||
198 | extern int udp4_proc_init(void); | 205 | extern int udp4_proc_init(void); |
199 | extern void udp4_proc_exit(void); | 206 | extern void udp4_proc_exit(void); |
200 | #endif | 207 | #endif |
208 | |||
209 | extern void udp_init(void); | ||
201 | #endif /* _UDP_H */ | 210 | #endif /* _UDP_H */ |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 03633b7b9b4a..0e12cf646071 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -139,6 +139,8 @@ void inet_sock_destruct(struct sock *sk) | |||
139 | __skb_queue_purge(&sk->sk_receive_queue); | 139 | __skb_queue_purge(&sk->sk_receive_queue); |
140 | __skb_queue_purge(&sk->sk_error_queue); | 140 | __skb_queue_purge(&sk->sk_error_queue); |
141 | 141 | ||
142 | sk_mem_reclaim(sk); | ||
143 | |||
142 | if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { | 144 | if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { |
143 | printk("Attempt to release TCP socket in state %d %p\n", | 145 | printk("Attempt to release TCP socket in state %d %p\n", |
144 | sk->sk_state, sk); | 146 | sk->sk_state, sk); |
@@ -1417,6 +1419,9 @@ static int __init inet_init(void) | |||
1417 | /* Setup TCP slab cache for open requests. */ | 1419 | /* Setup TCP slab cache for open requests. */ |
1418 | tcp_init(); | 1420 | tcp_init(); |
1419 | 1421 | ||
1422 | /* Setup UDP memory threshold */ | ||
1423 | udp_init(); | ||
1424 | |||
1420 | /* Add UDP-Lite (RFC 3828) */ | 1425 | /* Add UDP-Lite (RFC 3828) */ |
1421 | udplite4_register(); | 1426 | udplite4_register(); |
1422 | 1427 | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 41734db677be..53bc010beefd 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -56,7 +56,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
56 | sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), | 56 | sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), |
57 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), | 57 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), |
58 | atomic_read(&tcp_memory_allocated)); | 58 | atomic_read(&tcp_memory_allocated)); |
59 | seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot)); | 59 | seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse(&udp_prot), |
60 | atomic_read(&udp_memory_allocated)); | ||
60 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); | 61 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); |
61 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); | 62 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); |
62 | seq_printf(seq, "FRAG: inuse %d memory %d\n", | 63 | seq_printf(seq, "FRAG: inuse %d memory %d\n", |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 844f26fab06f..a5a9f8e3bb25 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <net/ip.h> | 19 | #include <net/ip.h> |
20 | #include <net/route.h> | 20 | #include <net/route.h> |
21 | #include <net/tcp.h> | 21 | #include <net/tcp.h> |
22 | #include <net/udp.h> | ||
22 | #include <net/cipso_ipv4.h> | 23 | #include <net/cipso_ipv4.h> |
23 | #include <net/inet_frag.h> | 24 | #include <net/inet_frag.h> |
24 | 25 | ||
@@ -812,6 +813,36 @@ static struct ctl_table ipv4_table[] = { | |||
812 | .mode = 0644, | 813 | .mode = 0644, |
813 | .proc_handler = &proc_dointvec, | 814 | .proc_handler = &proc_dointvec, |
814 | }, | 815 | }, |
816 | { | ||
817 | .ctl_name = CTL_UNNUMBERED, | ||
818 | .procname = "udp_mem", | ||
819 | .data = &sysctl_udp_mem, | ||
820 | .maxlen = sizeof(sysctl_udp_mem), | ||
821 | .mode = 0644, | ||
822 | .proc_handler = &proc_dointvec_minmax, | ||
823 | .strategy = &sysctl_intvec, | ||
824 | .extra1 = &zero | ||
825 | }, | ||
826 | { | ||
827 | .ctl_name = CTL_UNNUMBERED, | ||
828 | .procname = "udp_rmem_min", | ||
829 | .data = &sysctl_udp_rmem_min, | ||
830 | .maxlen = sizeof(sysctl_udp_rmem_min), | ||
831 | .mode = 0644, | ||
832 | .proc_handler = &proc_dointvec_minmax, | ||
833 | .strategy = &sysctl_intvec, | ||
834 | .extra1 = &zero | ||
835 | }, | ||
836 | { | ||
837 | .ctl_name = CTL_UNNUMBERED, | ||
838 | .procname = "udp_wmem_min", | ||
839 | .data = &sysctl_udp_wmem_min, | ||
840 | .maxlen = sizeof(sysctl_udp_wmem_min), | ||
841 | .mode = 0644, | ||
842 | .proc_handler = &proc_dointvec_minmax, | ||
843 | .strategy = &sysctl_intvec, | ||
844 | .extra1 = &zero | ||
845 | }, | ||
815 | { .ctl_name = 0 } | 846 | { .ctl_name = 0 } |
816 | }; | 847 | }; |
817 | 848 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1ce6b60b7f93..353284360751 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -82,6 +82,7 @@ | |||
82 | #include <asm/system.h> | 82 | #include <asm/system.h> |
83 | #include <asm/uaccess.h> | 83 | #include <asm/uaccess.h> |
84 | #include <asm/ioctls.h> | 84 | #include <asm/ioctls.h> |
85 | #include <linux/bootmem.h> | ||
85 | #include <linux/types.h> | 86 | #include <linux/types.h> |
86 | #include <linux/fcntl.h> | 87 | #include <linux/fcntl.h> |
87 | #include <linux/module.h> | 88 | #include <linux/module.h> |
@@ -118,6 +119,17 @@ EXPORT_SYMBOL(udp_stats_in6); | |||
118 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; | 119 | struct hlist_head udp_hash[UDP_HTABLE_SIZE]; |
119 | DEFINE_RWLOCK(udp_hash_lock); | 120 | DEFINE_RWLOCK(udp_hash_lock); |
120 | 121 | ||
122 | int sysctl_udp_mem[3] __read_mostly; | ||
123 | int sysctl_udp_rmem_min __read_mostly; | ||
124 | int sysctl_udp_wmem_min __read_mostly; | ||
125 | |||
126 | EXPORT_SYMBOL(sysctl_udp_mem); | ||
127 | EXPORT_SYMBOL(sysctl_udp_rmem_min); | ||
128 | EXPORT_SYMBOL(sysctl_udp_wmem_min); | ||
129 | |||
130 | atomic_t udp_memory_allocated; | ||
131 | EXPORT_SYMBOL(udp_memory_allocated); | ||
132 | |||
121 | static inline int __udp_lib_lport_inuse(__u16 num, | 133 | static inline int __udp_lib_lport_inuse(__u16 num, |
122 | const struct hlist_head udptable[]) | 134 | const struct hlist_head udptable[]) |
123 | { | 135 | { |
@@ -901,13 +913,17 @@ try_again: | |||
901 | err = ulen; | 913 | err = ulen; |
902 | 914 | ||
903 | out_free: | 915 | out_free: |
916 | lock_sock(sk); | ||
904 | skb_free_datagram(sk, skb); | 917 | skb_free_datagram(sk, skb); |
918 | release_sock(sk); | ||
905 | out: | 919 | out: |
906 | return err; | 920 | return err; |
907 | 921 | ||
908 | csum_copy_err: | 922 | csum_copy_err: |
923 | lock_sock(sk); | ||
909 | if (!skb_kill_datagram(sk, skb, flags)) | 924 | if (!skb_kill_datagram(sk, skb, flags)) |
910 | UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); | 925 | UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); |
926 | release_sock(sk); | ||
911 | 927 | ||
912 | if (noblock) | 928 | if (noblock) |
913 | return -EAGAIN; | 929 | return -EAGAIN; |
@@ -1072,7 +1088,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, | |||
1072 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1088 | skb1 = skb_clone(skb, GFP_ATOMIC); |
1073 | 1089 | ||
1074 | if (skb1) { | 1090 | if (skb1) { |
1075 | int ret = udp_queue_rcv_skb(sk, skb1); | 1091 | int ret = 0; |
1092 | |||
1093 | bh_lock_sock_nested(sk); | ||
1094 | if (!sock_owned_by_user(sk)) | ||
1095 | ret = udp_queue_rcv_skb(sk, skb1); | ||
1096 | else | ||
1097 | sk_add_backlog(sk, skb1); | ||
1098 | bh_unlock_sock(sk); | ||
1099 | |||
1076 | if (ret > 0) | 1100 | if (ret > 0) |
1077 | /* we should probably re-process instead | 1101 | /* we should probably re-process instead |
1078 | * of dropping packets here. */ | 1102 | * of dropping packets here. */ |
@@ -1165,7 +1189,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1165 | inet_iif(skb), udptable); | 1189 | inet_iif(skb), udptable); |
1166 | 1190 | ||
1167 | if (sk != NULL) { | 1191 | if (sk != NULL) { |
1168 | int ret = udp_queue_rcv_skb(sk, skb); | 1192 | int ret = 0; |
1193 | bh_lock_sock_nested(sk); | ||
1194 | if (!sock_owned_by_user(sk)) | ||
1195 | ret = udp_queue_rcv_skb(sk, skb); | ||
1196 | else | ||
1197 | sk_add_backlog(sk, skb); | ||
1198 | bh_unlock_sock(sk); | ||
1169 | sock_put(sk); | 1199 | sock_put(sk); |
1170 | 1200 | ||
1171 | /* a return value > 0 means to resubmit the input, but | 1201 | /* a return value > 0 means to resubmit the input, but |
@@ -1460,6 +1490,10 @@ struct proto udp_prot = { | |||
1460 | .hash = udp_lib_hash, | 1490 | .hash = udp_lib_hash, |
1461 | .unhash = udp_lib_unhash, | 1491 | .unhash = udp_lib_unhash, |
1462 | .get_port = udp_v4_get_port, | 1492 | .get_port = udp_v4_get_port, |
1493 | .memory_allocated = &udp_memory_allocated, | ||
1494 | .sysctl_mem = sysctl_udp_mem, | ||
1495 | .sysctl_wmem = &sysctl_udp_wmem_min, | ||
1496 | .sysctl_rmem = &sysctl_udp_rmem_min, | ||
1463 | .obj_size = sizeof(struct udp_sock), | 1497 | .obj_size = sizeof(struct udp_sock), |
1464 | #ifdef CONFIG_COMPAT | 1498 | #ifdef CONFIG_COMPAT |
1465 | .compat_setsockopt = compat_udp_setsockopt, | 1499 | .compat_setsockopt = compat_udp_setsockopt, |
@@ -1655,6 +1689,25 @@ void udp4_proc_exit(void) | |||
1655 | } | 1689 | } |
1656 | #endif /* CONFIG_PROC_FS */ | 1690 | #endif /* CONFIG_PROC_FS */ |
1657 | 1691 | ||
1692 | void __init udp_init(void) | ||
1693 | { | ||
1694 | unsigned long limit; | ||
1695 | |||
1696 | /* Set the pressure threshold up by the same strategy of TCP. It is a | ||
1697 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | ||
1698 | * toward zero with the amount of memory, with a floor of 128 pages. | ||
1699 | */ | ||
1700 | limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | ||
1701 | limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | ||
1702 | limit = max(limit, 128UL); | ||
1703 | sysctl_udp_mem[0] = limit / 4 * 3; | ||
1704 | sysctl_udp_mem[1] = limit; | ||
1705 | sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; | ||
1706 | |||
1707 | sysctl_udp_rmem_min = SK_MEM_QUANTUM; | ||
1708 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; | ||
1709 | } | ||
1710 | |||
1658 | EXPORT_SYMBOL(udp_disconnect); | 1711 | EXPORT_SYMBOL(udp_disconnect); |
1659 | EXPORT_SYMBOL(udp_hash); | 1712 | EXPORT_SYMBOL(udp_hash); |
1660 | EXPORT_SYMBOL(udp_hash_lock); | 1713 | EXPORT_SYMBOL(udp_hash_lock); |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c9a97b405511..bf58acab2064 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -204,13 +204,17 @@ try_again: | |||
204 | err = ulen; | 204 | err = ulen; |
205 | 205 | ||
206 | out_free: | 206 | out_free: |
207 | lock_sock(sk); | ||
207 | skb_free_datagram(sk, skb); | 208 | skb_free_datagram(sk, skb); |
209 | release_sock(sk); | ||
208 | out: | 210 | out: |
209 | return err; | 211 | return err; |
210 | 212 | ||
211 | csum_copy_err: | 213 | csum_copy_err: |
214 | lock_sock(sk); | ||
212 | if (!skb_kill_datagram(sk, skb, flags)) | 215 | if (!skb_kill_datagram(sk, skb, flags)) |
213 | UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); | 216 | UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); |
217 | release_sock(sk); | ||
214 | 218 | ||
215 | if (flags & MSG_DONTWAIT) | 219 | if (flags & MSG_DONTWAIT) |
216 | return -EAGAIN; | 220 | return -EAGAIN; |
@@ -366,10 +370,21 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, | |||
366 | while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, | 370 | while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, |
367 | uh->source, saddr, dif))) { | 371 | uh->source, saddr, dif))) { |
368 | struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); | 372 | struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); |
369 | if (buff) | 373 | if (buff) { |
370 | udpv6_queue_rcv_skb(sk2, buff); | 374 | bh_lock_sock_nested(sk2); |
375 | if (!sock_owned_by_user(sk2)) | ||
376 | udpv6_queue_rcv_skb(sk2, buff); | ||
377 | else | ||
378 | sk_add_backlog(sk2, buff); | ||
379 | bh_unlock_sock(sk2); | ||
380 | } | ||
371 | } | 381 | } |
372 | udpv6_queue_rcv_skb(sk, skb); | 382 | bh_lock_sock_nested(sk); |
383 | if (!sock_owned_by_user(sk)) | ||
384 | udpv6_queue_rcv_skb(sk, skb); | ||
385 | else | ||
386 | sk_add_backlog(sk, skb); | ||
387 | bh_unlock_sock(sk); | ||
373 | out: | 388 | out: |
374 | read_unlock(&udp_hash_lock); | 389 | read_unlock(&udp_hash_lock); |
375 | return 0; | 390 | return 0; |
@@ -482,7 +497,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
482 | 497 | ||
483 | /* deliver */ | 498 | /* deliver */ |
484 | 499 | ||
485 | udpv6_queue_rcv_skb(sk, skb); | 500 | bh_lock_sock_nested(sk); |
501 | if (!sock_owned_by_user(sk)) | ||
502 | udpv6_queue_rcv_skb(sk, skb); | ||
503 | else | ||
504 | sk_add_backlog(sk, skb); | ||
505 | bh_unlock_sock(sk); | ||
486 | sock_put(sk); | 506 | sock_put(sk); |
487 | return 0; | 507 | return 0; |
488 | 508 | ||
@@ -994,6 +1014,10 @@ struct proto udpv6_prot = { | |||
994 | .hash = udp_lib_hash, | 1014 | .hash = udp_lib_hash, |
995 | .unhash = udp_lib_unhash, | 1015 | .unhash = udp_lib_unhash, |
996 | .get_port = udp_v6_get_port, | 1016 | .get_port = udp_v6_get_port, |
1017 | .memory_allocated = &udp_memory_allocated, | ||
1018 | .sysctl_mem = sysctl_udp_mem, | ||
1019 | .sysctl_wmem = &sysctl_udp_wmem_min, | ||
1020 | .sysctl_rmem = &sysctl_udp_rmem_min, | ||
997 | .obj_size = sizeof(struct udp6_sock), | 1021 | .obj_size = sizeof(struct udp6_sock), |
998 | #ifdef CONFIG_COMPAT | 1022 | #ifdef CONFIG_COMPAT |
999 | .compat_setsockopt = compat_udpv6_setsockopt, | 1023 | .compat_setsockopt = compat_udpv6_setsockopt, |