diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-12-11 23:23:58 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-12-11 23:23:58 -0500 |
commit | 913f2d792ff584bf9ebf5968472b3e4ffe0fd9c8 (patch) | |
tree | 37c499384a1e516a9b92ddb841fada84e9be0167 | |
parent | fe3f2053fd42148b3ebb7b90de5f20eaf3393860 (diff) | |
parent | 73d4f84fd001b0be67fea46e84b75e6a7a5da08e (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
-rw-r--r-- | drivers/net/tg3.c | 15 | ||||
-rw-r--r-- | include/linux/sysctl.h | 3 | ||||
-rw-r--r-- | include/net/dn.h | 4 | ||||
-rw-r--r-- | net/core/dev.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 2 | ||||
-rw-r--r-- | net/decnet/af_decnet.c | 25 | ||||
-rw-r--r-- | net/decnet/sysctl_net_decnet.c | 33 | ||||
-rw-r--r-- | net/ipv4/netfilter/Kconfig | 8 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_core.c | 20 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_netlink.c | 12 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 233 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 16 | ||||
-rw-r--r-- | net/ipv6/esp6.c | 2 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 12 | ||||
-rw-r--r-- | net/netfilter/Kconfig | 4 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 3 | ||||
-rw-r--r-- | net/netfilter/nfnetlink.c | 5 | ||||
-rw-r--r-- | net/packet/af_packet.c | 115 |
19 files changed, 299 insertions, 219 deletions
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 1828a6bf8458..47bd4a394420 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c | |||
@@ -68,8 +68,8 @@ | |||
68 | 68 | ||
69 | #define DRV_MODULE_NAME "tg3" | 69 | #define DRV_MODULE_NAME "tg3" |
70 | #define PFX DRV_MODULE_NAME ": " | 70 | #define PFX DRV_MODULE_NAME ": " |
71 | #define DRV_MODULE_VERSION "3.43" | 71 | #define DRV_MODULE_VERSION "3.44" |
72 | #define DRV_MODULE_RELDATE "Oct 24, 2005" | 72 | #define DRV_MODULE_RELDATE "Dec 6, 2005" |
73 | 73 | ||
74 | #define TG3_DEF_MAC_MODE 0 | 74 | #define TG3_DEF_MAC_MODE 0 |
75 | #define TG3_DEF_RX_MODE 0 | 75 | #define TG3_DEF_RX_MODE 0 |
@@ -3565,12 +3565,15 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) | |||
3565 | if (!spin_trylock(&tp->tx_lock)) | 3565 | if (!spin_trylock(&tp->tx_lock)) |
3566 | return NETDEV_TX_LOCKED; | 3566 | return NETDEV_TX_LOCKED; |
3567 | 3567 | ||
3568 | /* This is a hard error, log it. */ | ||
3569 | if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { | 3568 | if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { |
3570 | netif_stop_queue(dev); | 3569 | if (!netif_queue_stopped(dev)) { |
3570 | netif_stop_queue(dev); | ||
3571 | |||
3572 | /* This is a hard error, log it. */ | ||
3573 | printk(KERN_ERR PFX "%s: BUG! Tx Ring full when " | ||
3574 | "queue awake!\n", dev->name); | ||
3575 | } | ||
3571 | spin_unlock(&tp->tx_lock); | 3576 | spin_unlock(&tp->tx_lock); |
3572 | printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n", | ||
3573 | dev->name); | ||
3574 | return NETDEV_TX_BUSY; | 3577 | return NETDEV_TX_BUSY; |
3575 | } | 3578 | } |
3576 | 3579 | ||
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6bc03c911a83..4be34ef8c2f7 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -670,6 +670,9 @@ enum { | |||
670 | NET_DECNET_DST_GC_INTERVAL = 9, | 670 | NET_DECNET_DST_GC_INTERVAL = 9, |
671 | NET_DECNET_CONF = 10, | 671 | NET_DECNET_CONF = 10, |
672 | NET_DECNET_NO_FC_MAX_CWND = 11, | 672 | NET_DECNET_NO_FC_MAX_CWND = 11, |
673 | NET_DECNET_MEM = 12, | ||
674 | NET_DECNET_RMEM = 13, | ||
675 | NET_DECNET_WMEM = 14, | ||
673 | NET_DECNET_DEBUG_LEVEL = 255 | 676 | NET_DECNET_DEBUG_LEVEL = 255 |
674 | }; | 677 | }; |
675 | 678 | ||
diff --git a/include/net/dn.h b/include/net/dn.h index c1dbbd222793..a4b6168e1e25 100644 --- a/include/net/dn.h +++ b/include/net/dn.h | |||
@@ -234,4 +234,8 @@ extern int decnet_di_count; | |||
234 | extern int decnet_dr_count; | 234 | extern int decnet_dr_count; |
235 | extern int decnet_no_fc_max_cwnd; | 235 | extern int decnet_no_fc_max_cwnd; |
236 | 236 | ||
237 | extern int sysctl_decnet_mem[3]; | ||
238 | extern int sysctl_decnet_wmem[3]; | ||
239 | extern int sysctl_decnet_rmem[3]; | ||
240 | |||
237 | #endif /* _NET_DN_H */ | 241 | #endif /* _NET_DN_H */ |
diff --git a/net/core/dev.c b/net/core/dev.c index 0b48e294aafe..a5efc9ae010b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1113,7 +1113,8 @@ out: | |||
1113 | void netdev_rx_csum_fault(struct net_device *dev) | 1113 | void netdev_rx_csum_fault(struct net_device *dev) |
1114 | { | 1114 | { |
1115 | if (net_ratelimit()) { | 1115 | if (net_ratelimit()) { |
1116 | printk(KERN_ERR "%s: hw csum failure.\n", dev->name); | 1116 | printk(KERN_ERR "%s: hw csum failure.\n", |
1117 | dev ? dev->name : "<unknown>"); | ||
1117 | dump_stack(); | 1118 | dump_stack(); |
1118 | } | 1119 | } |
1119 | } | 1120 | } |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b7d13a4fff48..83fee37de38e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -1725,7 +1725,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, | |||
1725 | * of the skb if any page alloc fails user this procedure returns -ENOMEM | 1725 | * of the skb if any page alloc fails user this procedure returns -ENOMEM |
1726 | */ | 1726 | */ |
1727 | int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, | 1727 | int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, |
1728 | int getfrag(void *from, char *to, int offset, | 1728 | int (*getfrag)(void *from, char *to, int offset, |
1729 | int len, int odd, struct sk_buff *skb), | 1729 | int len, int odd, struct sk_buff *skb), |
1730 | void *from, int length) | 1730 | void *from, int length) |
1731 | { | 1731 | { |
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index f89e55f814d9..d402e9020c68 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c | |||
@@ -153,6 +153,7 @@ static struct proto_ops dn_proto_ops; | |||
153 | static DEFINE_RWLOCK(dn_hash_lock); | 153 | static DEFINE_RWLOCK(dn_hash_lock); |
154 | static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; | 154 | static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; |
155 | static struct hlist_head dn_wild_sk; | 155 | static struct hlist_head dn_wild_sk; |
156 | static atomic_t decnet_memory_allocated; | ||
156 | 157 | ||
157 | static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags); | 158 | static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags); |
158 | static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); | 159 | static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); |
@@ -446,10 +447,26 @@ static void dn_destruct(struct sock *sk) | |||
446 | dst_release(xchg(&sk->sk_dst_cache, NULL)); | 447 | dst_release(xchg(&sk->sk_dst_cache, NULL)); |
447 | } | 448 | } |
448 | 449 | ||
450 | static int dn_memory_pressure; | ||
451 | |||
452 | static void dn_enter_memory_pressure(void) | ||
453 | { | ||
454 | if (!dn_memory_pressure) { | ||
455 | dn_memory_pressure = 1; | ||
456 | } | ||
457 | } | ||
458 | |||
449 | static struct proto dn_proto = { | 459 | static struct proto dn_proto = { |
450 | .name = "DECNET", | 460 | .name = "NSP", |
451 | .owner = THIS_MODULE, | 461 | .owner = THIS_MODULE, |
452 | .obj_size = sizeof(struct dn_sock), | 462 | .enter_memory_pressure = dn_enter_memory_pressure, |
463 | .memory_pressure = &dn_memory_pressure, | ||
464 | .memory_allocated = &decnet_memory_allocated, | ||
465 | .sysctl_mem = sysctl_decnet_mem, | ||
466 | .sysctl_wmem = sysctl_decnet_wmem, | ||
467 | .sysctl_rmem = sysctl_decnet_rmem, | ||
468 | .max_header = DN_MAX_NSP_DATA_HEADER + 64, | ||
469 | .obj_size = sizeof(struct dn_sock), | ||
453 | }; | 470 | }; |
454 | 471 | ||
455 | static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) | 472 | static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) |
@@ -470,6 +487,8 @@ static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) | |||
470 | sk->sk_family = PF_DECnet; | 487 | sk->sk_family = PF_DECnet; |
471 | sk->sk_protocol = 0; | 488 | sk->sk_protocol = 0; |
472 | sk->sk_allocation = gfp; | 489 | sk->sk_allocation = gfp; |
490 | sk->sk_sndbuf = sysctl_decnet_wmem[1]; | ||
491 | sk->sk_rcvbuf = sysctl_decnet_rmem[1]; | ||
473 | 492 | ||
474 | /* Initialization of DECnet Session Control Port */ | 493 | /* Initialization of DECnet Session Control Port */ |
475 | scp = DN_SK(sk); | 494 | scp = DN_SK(sk); |
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 02bca49cb508..0e9d2c571165 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * | 10 | * |
11 | * Changes: | 11 | * Changes: |
12 | * Steve Whitehouse - C99 changes and default device handling | 12 | * Steve Whitehouse - C99 changes and default device handling |
13 | * Steve Whitehouse - Memory buffer settings, like the tcp ones | ||
13 | * | 14 | * |
14 | */ | 15 | */ |
15 | #include <linux/config.h> | 16 | #include <linux/config.h> |
@@ -37,6 +38,11 @@ int decnet_dr_count = 3; | |||
37 | int decnet_log_martians = 1; | 38 | int decnet_log_martians = 1; |
38 | int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; | 39 | int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; |
39 | 40 | ||
41 | /* Reasonable defaults, I hope, based on tcp's defaults */ | ||
42 | int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; | ||
43 | int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; | ||
44 | int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; | ||
45 | |||
40 | #ifdef CONFIG_SYSCTL | 46 | #ifdef CONFIG_SYSCTL |
41 | extern int decnet_dst_gc_interval; | 47 | extern int decnet_dst_gc_interval; |
42 | static int min_decnet_time_wait[] = { 5 }; | 48 | static int min_decnet_time_wait[] = { 5 }; |
@@ -428,6 +434,33 @@ static ctl_table dn_table[] = { | |||
428 | .extra1 = &min_decnet_no_fc_max_cwnd, | 434 | .extra1 = &min_decnet_no_fc_max_cwnd, |
429 | .extra2 = &max_decnet_no_fc_max_cwnd | 435 | .extra2 = &max_decnet_no_fc_max_cwnd |
430 | }, | 436 | }, |
437 | { | ||
438 | .ctl_name = NET_DECNET_MEM, | ||
439 | .procname = "decnet_mem", | ||
440 | .data = &sysctl_decnet_mem, | ||
441 | .maxlen = sizeof(sysctl_decnet_mem), | ||
442 | .mode = 0644, | ||
443 | .proc_handler = &proc_dointvec, | ||
444 | .strategy = &sysctl_intvec, | ||
445 | }, | ||
446 | { | ||
447 | .ctl_name = NET_DECNET_RMEM, | ||
448 | .procname = "decnet_rmem", | ||
449 | .data = &sysctl_decnet_rmem, | ||
450 | .maxlen = sizeof(sysctl_decnet_rmem), | ||
451 | .mode = 0644, | ||
452 | .proc_handler = &proc_dointvec, | ||
453 | .strategy = &sysctl_intvec, | ||
454 | }, | ||
455 | { | ||
456 | .ctl_name = NET_DECNET_WMEM, | ||
457 | .procname = "decnet_wmem", | ||
458 | .data = &sysctl_decnet_wmem, | ||
459 | .maxlen = sizeof(sysctl_decnet_wmem), | ||
460 | .mode = 0644, | ||
461 | .proc_handler = &proc_dointvec, | ||
462 | .strategy = &sysctl_intvec, | ||
463 | }, | ||
431 | { | 464 | { |
432 | .ctl_name = NET_DECNET_DEBUG_LEVEL, | 465 | .ctl_name = NET_DECNET_DEBUG_LEVEL, |
433 | .procname = "debug", | 466 | .procname = "debug", |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 0bc00528d888..88a60650e6b8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK | |||
56 | instead of the individual packets. | 56 | instead of the individual packets. |
57 | 57 | ||
58 | config IP_NF_CONNTRACK_EVENTS | 58 | config IP_NF_CONNTRACK_EVENTS |
59 | bool "Connection tracking events" | 59 | bool "Connection tracking events (EXPERIMENTAL)" |
60 | depends on IP_NF_CONNTRACK | 60 | depends on EXPERIMENTAL && IP_NF_CONNTRACK |
61 | help | 61 | help |
62 | If this option is enabled, the connection tracking code will | 62 | If this option is enabled, the connection tracking code will |
63 | provide a notifier chain that can be used by other kernel code | 63 | provide a notifier chain that can be used by other kernel code |
@@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS | |||
66 | IF unsure, say `N'. | 66 | IF unsure, say `N'. |
67 | 67 | ||
68 | config IP_NF_CONNTRACK_NETLINK | 68 | config IP_NF_CONNTRACK_NETLINK |
69 | tristate 'Connection tracking netlink interface' | 69 | tristate 'Connection tracking netlink interface (EXPERIMENTAL)' |
70 | depends on IP_NF_CONNTRACK && NETFILTER_NETLINK | 70 | depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK |
71 | depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m | 71 | depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m |
72 | help | 72 | help |
73 | This option enables support for a netlink-based userspace interface | 73 | This option enables support for a netlink-based userspace interface |
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 7a4ecddd597b..84c66dbfedaf 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
@@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data) | |||
1345 | return 1; | 1345 | return 1; |
1346 | } | 1346 | } |
1347 | 1347 | ||
1348 | void ip_conntrack_flush(void) | ||
1349 | { | ||
1350 | ip_ct_iterate_cleanup(kill_all, NULL); | ||
1351 | } | ||
1352 | |||
1348 | static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) | 1353 | static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) |
1349 | { | 1354 | { |
1350 | if (vmalloced) | 1355 | if (vmalloced) |
@@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) | |||
1354 | get_order(sizeof(struct list_head) * size)); | 1359 | get_order(sizeof(struct list_head) * size)); |
1355 | } | 1360 | } |
1356 | 1361 | ||
1357 | void ip_conntrack_flush(void) | 1362 | /* Mishearing the voices in his head, our hero wonders how he's |
1363 | supposed to kill the mall. */ | ||
1364 | void ip_conntrack_cleanup(void) | ||
1358 | { | 1365 | { |
1366 | ip_ct_attach = NULL; | ||
1367 | |||
1359 | /* This makes sure all current packets have passed through | 1368 | /* This makes sure all current packets have passed through |
1360 | netfilter framework. Roll on, two-stage module | 1369 | netfilter framework. Roll on, two-stage module |
1361 | delete... */ | 1370 | delete... */ |
@@ -1363,7 +1372,7 @@ void ip_conntrack_flush(void) | |||
1363 | 1372 | ||
1364 | ip_ct_event_cache_flush(); | 1373 | ip_ct_event_cache_flush(); |
1365 | i_see_dead_people: | 1374 | i_see_dead_people: |
1366 | ip_ct_iterate_cleanup(kill_all, NULL); | 1375 | ip_conntrack_flush(); |
1367 | if (atomic_read(&ip_conntrack_count) != 0) { | 1376 | if (atomic_read(&ip_conntrack_count) != 0) { |
1368 | schedule(); | 1377 | schedule(); |
1369 | goto i_see_dead_people; | 1378 | goto i_see_dead_people; |
@@ -1371,14 +1380,7 @@ void ip_conntrack_flush(void) | |||
1371 | /* wait until all references to ip_conntrack_untracked are dropped */ | 1380 | /* wait until all references to ip_conntrack_untracked are dropped */ |
1372 | while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) | 1381 | while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) |
1373 | schedule(); | 1382 | schedule(); |
1374 | } | ||
1375 | 1383 | ||
1376 | /* Mishearing the voices in his head, our hero wonders how he's | ||
1377 | supposed to kill the mall. */ | ||
1378 | void ip_conntrack_cleanup(void) | ||
1379 | { | ||
1380 | ip_ct_attach = NULL; | ||
1381 | ip_conntrack_flush(); | ||
1382 | kmem_cache_destroy(ip_conntrack_cachep); | 1384 | kmem_cache_destroy(ip_conntrack_cachep); |
1383 | kmem_cache_destroy(ip_conntrack_expect_cachep); | 1385 | kmem_cache_destroy(ip_conntrack_expect_cachep); |
1384 | free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, | 1386 | free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, |
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 3fce91bcc0ba..91fe8f2e38ff 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c | |||
@@ -503,7 +503,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) | |||
503 | } | 503 | } |
504 | 504 | ||
505 | static const size_t cta_min_proto[CTA_PROTO_MAX] = { | 505 | static const size_t cta_min_proto[CTA_PROTO_MAX] = { |
506 | [CTA_PROTO_NUM-1] = sizeof(u_int16_t), | 506 | [CTA_PROTO_NUM-1] = sizeof(u_int8_t), |
507 | [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), | 507 | [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), |
508 | [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), | 508 | [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), |
509 | [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), | 509 | [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), |
@@ -528,7 +528,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, | |||
528 | 528 | ||
529 | if (!tb[CTA_PROTO_NUM-1]) | 529 | if (!tb[CTA_PROTO_NUM-1]) |
530 | return -EINVAL; | 530 | return -EINVAL; |
531 | tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); | 531 | tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); |
532 | 532 | ||
533 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | 533 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); |
534 | 534 | ||
@@ -728,11 +728,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
728 | return -ENOENT; | 728 | return -ENOENT; |
729 | } | 729 | } |
730 | } | 730 | } |
731 | if (del_timer(&ct->timeout)) { | 731 | if (del_timer(&ct->timeout)) |
732 | ip_conntrack_put(ct); | ||
733 | ct->timeout.function((unsigned long)ct); | 732 | ct->timeout.function((unsigned long)ct); |
734 | return 0; | 733 | |
735 | } | ||
736 | ip_conntrack_put(ct); | 734 | ip_conntrack_put(ct); |
737 | DEBUGP("leaving\n"); | 735 | DEBUGP("leaving\n"); |
738 | 736 | ||
@@ -877,7 +875,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) | |||
877 | DEBUGP("NAT status: %lu\n", | 875 | DEBUGP("NAT status: %lu\n", |
878 | status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); | 876 | status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); |
879 | 877 | ||
880 | if (ip_nat_initialized(ct, hooknum)) | 878 | if (ip_nat_initialized(ct, HOOK2MANIP(hooknum))) |
881 | return -EEXIST; | 879 | return -EEXIST; |
882 | ip_nat_setup_info(ct, &range, hooknum); | 880 | ip_nat_setup_info(ct, &range, hooknum); |
883 | 881 | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index aeb7353d4777..e7fa29e576dc 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c | |||
@@ -341,9 +341,10 @@ static int tcp_print_conntrack(struct seq_file *s, | |||
341 | static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, | 341 | static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, |
342 | const struct ip_conntrack *ct) | 342 | const struct ip_conntrack *ct) |
343 | { | 343 | { |
344 | struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); | 344 | struct nfattr *nest_parms; |
345 | 345 | ||
346 | read_lock_bh(&tcp_lock); | 346 | read_lock_bh(&tcp_lock); |
347 | nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); | ||
347 | NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), | 348 | NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), |
348 | &ct->proto.tcp.state); | 349 | &ct->proto.tcp.state); |
349 | read_unlock_bh(&tcp_lock); | 350 | read_unlock_bh(&tcp_lock); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 029c70dfb585..b7325e0b406a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -262,122 +262,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk) | |||
262 | * We are working here with either a clone of the original | 262 | * We are working here with either a clone of the original |
263 | * SKB, or a fresh unique copy made by the retransmit engine. | 263 | * SKB, or a fresh unique copy made by the retransmit engine. |
264 | */ | 264 | */ |
265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | 265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) |
266 | { | 266 | { |
267 | if (skb != NULL) { | 267 | const struct inet_connection_sock *icsk = inet_csk(sk); |
268 | const struct inet_connection_sock *icsk = inet_csk(sk); | 268 | struct inet_sock *inet; |
269 | struct inet_sock *inet = inet_sk(sk); | 269 | struct tcp_sock *tp; |
270 | struct tcp_sock *tp = tcp_sk(sk); | 270 | struct tcp_skb_cb *tcb; |
271 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); | 271 | int tcp_header_size; |
272 | int tcp_header_size = tp->tcp_header_len; | 272 | struct tcphdr *th; |
273 | struct tcphdr *th; | 273 | int sysctl_flags; |
274 | int sysctl_flags; | 274 | int err; |
275 | int err; | 275 | |
276 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | ||
277 | |||
278 | /* If congestion control is doing timestamping, we must | ||
279 | * take such a timestamp before we potentially clone/copy. | ||
280 | */ | ||
281 | if (icsk->icsk_ca_ops->rtt_sample) | ||
282 | __net_timestamp(skb); | ||
283 | |||
284 | if (likely(clone_it)) { | ||
285 | if (unlikely(skb_cloned(skb))) | ||
286 | skb = pskb_copy(skb, gfp_mask); | ||
287 | else | ||
288 | skb = skb_clone(skb, gfp_mask); | ||
289 | if (unlikely(!skb)) | ||
290 | return -ENOBUFS; | ||
291 | } | ||
276 | 292 | ||
277 | BUG_ON(!tcp_skb_pcount(skb)); | 293 | inet = inet_sk(sk); |
294 | tp = tcp_sk(sk); | ||
295 | tcb = TCP_SKB_CB(skb); | ||
296 | tcp_header_size = tp->tcp_header_len; | ||
278 | 297 | ||
279 | #define SYSCTL_FLAG_TSTAMPS 0x1 | 298 | #define SYSCTL_FLAG_TSTAMPS 0x1 |
280 | #define SYSCTL_FLAG_WSCALE 0x2 | 299 | #define SYSCTL_FLAG_WSCALE 0x2 |
281 | #define SYSCTL_FLAG_SACK 0x4 | 300 | #define SYSCTL_FLAG_SACK 0x4 |
282 | 301 | ||
283 | /* If congestion control is doing timestamping */ | 302 | sysctl_flags = 0; |
284 | if (icsk->icsk_ca_ops->rtt_sample) | 303 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { |
285 | __net_timestamp(skb); | 304 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; |
286 | 305 | if(sysctl_tcp_timestamps) { | |
287 | sysctl_flags = 0; | 306 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; |
288 | if (tcb->flags & TCPCB_FLAG_SYN) { | 307 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; |
289 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; | ||
290 | if(sysctl_tcp_timestamps) { | ||
291 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; | ||
292 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; | ||
293 | } | ||
294 | if(sysctl_tcp_window_scaling) { | ||
295 | tcp_header_size += TCPOLEN_WSCALE_ALIGNED; | ||
296 | sysctl_flags |= SYSCTL_FLAG_WSCALE; | ||
297 | } | ||
298 | if(sysctl_tcp_sack) { | ||
299 | sysctl_flags |= SYSCTL_FLAG_SACK; | ||
300 | if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) | ||
301 | tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; | ||
302 | } | ||
303 | } else if (tp->rx_opt.eff_sacks) { | ||
304 | /* A SACK is 2 pad bytes, a 2 byte header, plus | ||
305 | * 2 32-bit sequence numbers for each SACK block. | ||
306 | */ | ||
307 | tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + | ||
308 | (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); | ||
309 | } | 308 | } |
310 | 309 | if (sysctl_tcp_window_scaling) { | |
311 | if (tcp_packets_in_flight(tp) == 0) | 310 | tcp_header_size += TCPOLEN_WSCALE_ALIGNED; |
312 | tcp_ca_event(sk, CA_EVENT_TX_START); | 311 | sysctl_flags |= SYSCTL_FLAG_WSCALE; |
313 | |||
314 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); | ||
315 | skb->h.th = th; | ||
316 | skb_set_owner_w(skb, sk); | ||
317 | |||
318 | /* Build TCP header and checksum it. */ | ||
319 | th->source = inet->sport; | ||
320 | th->dest = inet->dport; | ||
321 | th->seq = htonl(tcb->seq); | ||
322 | th->ack_seq = htonl(tp->rcv_nxt); | ||
323 | *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); | ||
324 | if (tcb->flags & TCPCB_FLAG_SYN) { | ||
325 | /* RFC1323: The window in SYN & SYN/ACK segments | ||
326 | * is never scaled. | ||
327 | */ | ||
328 | th->window = htons(tp->rcv_wnd); | ||
329 | } else { | ||
330 | th->window = htons(tcp_select_window(sk)); | ||
331 | } | 312 | } |
332 | th->check = 0; | 313 | if (sysctl_tcp_sack) { |
333 | th->urg_ptr = 0; | 314 | sysctl_flags |= SYSCTL_FLAG_SACK; |
334 | 315 | if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) | |
335 | if (tp->urg_mode && | 316 | tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; |
336 | between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) { | ||
337 | th->urg_ptr = htons(tp->snd_up-tcb->seq); | ||
338 | th->urg = 1; | ||
339 | } | 317 | } |
318 | } else if (unlikely(tp->rx_opt.eff_sacks)) { | ||
319 | /* A SACK is 2 pad bytes, a 2 byte header, plus | ||
320 | * 2 32-bit sequence numbers for each SACK block. | ||
321 | */ | ||
322 | tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + | ||
323 | (tp->rx_opt.eff_sacks * | ||
324 | TCPOLEN_SACK_PERBLOCK)); | ||
325 | } | ||
326 | |||
327 | if (tcp_packets_in_flight(tp) == 0) | ||
328 | tcp_ca_event(sk, CA_EVENT_TX_START); | ||
329 | |||
330 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); | ||
331 | skb->h.th = th; | ||
332 | skb_set_owner_w(skb, sk); | ||
333 | |||
334 | /* Build TCP header and checksum it. */ | ||
335 | th->source = inet->sport; | ||
336 | th->dest = inet->dport; | ||
337 | th->seq = htonl(tcb->seq); | ||
338 | th->ack_seq = htonl(tp->rcv_nxt); | ||
339 | *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | | ||
340 | tcb->flags); | ||
341 | |||
342 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | ||
343 | /* RFC1323: The window in SYN & SYN/ACK segments | ||
344 | * is never scaled. | ||
345 | */ | ||
346 | th->window = htons(tp->rcv_wnd); | ||
347 | } else { | ||
348 | th->window = htons(tcp_select_window(sk)); | ||
349 | } | ||
350 | th->check = 0; | ||
351 | th->urg_ptr = 0; | ||
340 | 352 | ||
341 | if (tcb->flags & TCPCB_FLAG_SYN) { | 353 | if (unlikely(tp->urg_mode && |
342 | tcp_syn_build_options((__u32 *)(th + 1), | 354 | between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { |
343 | tcp_advertise_mss(sk), | 355 | th->urg_ptr = htons(tp->snd_up-tcb->seq); |
344 | (sysctl_flags & SYSCTL_FLAG_TSTAMPS), | 356 | th->urg = 1; |
345 | (sysctl_flags & SYSCTL_FLAG_SACK), | 357 | } |
346 | (sysctl_flags & SYSCTL_FLAG_WSCALE), | ||
347 | tp->rx_opt.rcv_wscale, | ||
348 | tcb->when, | ||
349 | tp->rx_opt.ts_recent); | ||
350 | } else { | ||
351 | tcp_build_and_update_options((__u32 *)(th + 1), | ||
352 | tp, tcb->when); | ||
353 | 358 | ||
354 | TCP_ECN_send(sk, tp, skb, tcp_header_size); | 359 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { |
355 | } | 360 | tcp_syn_build_options((__u32 *)(th + 1), |
356 | tp->af_specific->send_check(sk, th, skb->len, skb); | 361 | tcp_advertise_mss(sk), |
362 | (sysctl_flags & SYSCTL_FLAG_TSTAMPS), | ||
363 | (sysctl_flags & SYSCTL_FLAG_SACK), | ||
364 | (sysctl_flags & SYSCTL_FLAG_WSCALE), | ||
365 | tp->rx_opt.rcv_wscale, | ||
366 | tcb->when, | ||
367 | tp->rx_opt.ts_recent); | ||
368 | } else { | ||
369 | tcp_build_and_update_options((__u32 *)(th + 1), | ||
370 | tp, tcb->when); | ||
371 | TCP_ECN_send(sk, tp, skb, tcp_header_size); | ||
372 | } | ||
357 | 373 | ||
358 | if (tcb->flags & TCPCB_FLAG_ACK) | 374 | tp->af_specific->send_check(sk, th, skb->len, skb); |
359 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); | ||
360 | 375 | ||
361 | if (skb->len != tcp_header_size) | 376 | if (likely(tcb->flags & TCPCB_FLAG_ACK)) |
362 | tcp_event_data_sent(tp, skb, sk); | 377 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); |
363 | 378 | ||
364 | TCP_INC_STATS(TCP_MIB_OUTSEGS); | 379 | if (skb->len != tcp_header_size) |
380 | tcp_event_data_sent(tp, skb, sk); | ||
365 | 381 | ||
366 | err = tp->af_specific->queue_xmit(skb, 0); | 382 | TCP_INC_STATS(TCP_MIB_OUTSEGS); |
367 | if (err <= 0) | ||
368 | return err; | ||
369 | 383 | ||
370 | tcp_enter_cwr(sk); | 384 | err = tp->af_specific->queue_xmit(skb, 0); |
385 | if (unlikely(err <= 0)) | ||
386 | return err; | ||
387 | |||
388 | tcp_enter_cwr(sk); | ||
389 | |||
390 | /* NET_XMIT_CN is special. It does not guarantee, | ||
391 | * that this packet is lost. It tells that device | ||
392 | * is about to start to drop packets or already | ||
393 | * drops some packets of the same priority and | ||
394 | * invokes us to send less aggressively. | ||
395 | */ | ||
396 | return err == NET_XMIT_CN ? 0 : err; | ||
371 | 397 | ||
372 | /* NET_XMIT_CN is special. It does not guarantee, | ||
373 | * that this packet is lost. It tells that device | ||
374 | * is about to start to drop packets or already | ||
375 | * drops some packets of the same priority and | ||
376 | * invokes us to send less aggressively. | ||
377 | */ | ||
378 | return err == NET_XMIT_CN ? 0 : err; | ||
379 | } | ||
380 | return -ENOBUFS; | ||
381 | #undef SYSCTL_FLAG_TSTAMPS | 398 | #undef SYSCTL_FLAG_TSTAMPS |
382 | #undef SYSCTL_FLAG_WSCALE | 399 | #undef SYSCTL_FLAG_WSCALE |
383 | #undef SYSCTL_FLAG_SACK | 400 | #undef SYSCTL_FLAG_SACK |
@@ -1036,7 +1053,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1036 | 1053 | ||
1037 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1054 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
1038 | 1055 | ||
1039 | if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) | 1056 | if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) |
1040 | break; | 1057 | break; |
1041 | 1058 | ||
1042 | /* Advance the send_head. This one is sent out. | 1059 | /* Advance the send_head. This one is sent out. |
@@ -1109,7 +1126,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
1109 | /* Send it out now. */ | 1126 | /* Send it out now. */ |
1110 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1127 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
1111 | 1128 | ||
1112 | if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) { | 1129 | if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { |
1113 | update_send_head(sk, tp, skb); | 1130 | update_send_head(sk, tp, skb); |
1114 | tcp_cwnd_validate(sk, tp); | 1131 | tcp_cwnd_validate(sk, tp); |
1115 | return; | 1132 | return; |
@@ -1429,9 +1446,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1429 | */ | 1446 | */ |
1430 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1447 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
1431 | 1448 | ||
1432 | err = tcp_transmit_skb(sk, (skb_cloned(skb) ? | 1449 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
1433 | pskb_copy(skb, GFP_ATOMIC): | ||
1434 | skb_clone(skb, GFP_ATOMIC))); | ||
1435 | 1450 | ||
1436 | if (err == 0) { | 1451 | if (err == 0) { |
1437 | /* Update global TCP statistics. */ | 1452 | /* Update global TCP statistics. */ |
@@ -1665,7 +1680,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
1665 | TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); | 1680 | TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); |
1666 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; | 1681 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; |
1667 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1682 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
1668 | if (tcp_transmit_skb(sk, skb)) | 1683 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
1669 | NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); | 1684 | NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); |
1670 | } | 1685 | } |
1671 | 1686 | ||
@@ -1700,7 +1715,7 @@ int tcp_send_synack(struct sock *sk) | |||
1700 | TCP_ECN_send_synack(tcp_sk(sk), skb); | 1715 | TCP_ECN_send_synack(tcp_sk(sk), skb); |
1701 | } | 1716 | } |
1702 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1717 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
1703 | return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); | 1718 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
1704 | } | 1719 | } |
1705 | 1720 | ||
1706 | /* | 1721 | /* |
@@ -1861,7 +1876,7 @@ int tcp_connect(struct sock *sk) | |||
1861 | __skb_queue_tail(&sk->sk_write_queue, buff); | 1876 | __skb_queue_tail(&sk->sk_write_queue, buff); |
1862 | sk_charge_skb(sk, buff); | 1877 | sk_charge_skb(sk, buff); |
1863 | tp->packets_out += tcp_skb_pcount(buff); | 1878 | tp->packets_out += tcp_skb_pcount(buff); |
1864 | tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); | 1879 | tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); |
1865 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); | 1880 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); |
1866 | 1881 | ||
1867 | /* Timer for repeating the SYN until an answer. */ | 1882 | /* Timer for repeating the SYN until an answer. */ |
@@ -1957,7 +1972,7 @@ void tcp_send_ack(struct sock *sk) | |||
1957 | /* Send it off, this clears delayed acks for us. */ | 1972 | /* Send it off, this clears delayed acks for us. */ |
1958 | TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); | 1973 | TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); |
1959 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 1974 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
1960 | tcp_transmit_skb(sk, buff); | 1975 | tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); |
1961 | } | 1976 | } |
1962 | } | 1977 | } |
1963 | 1978 | ||
@@ -1997,7 +2012,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
1997 | TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; | 2012 | TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; |
1998 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; | 2013 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; |
1999 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2014 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2000 | return tcp_transmit_skb(sk, skb); | 2015 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
2001 | } | 2016 | } |
2002 | 2017 | ||
2003 | int tcp_write_wakeup(struct sock *sk) | 2018 | int tcp_write_wakeup(struct sock *sk) |
@@ -2030,7 +2045,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2030 | 2045 | ||
2031 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 2046 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; |
2032 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2047 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2033 | err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); | 2048 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2034 | if (!err) { | 2049 | if (!err) { |
2035 | update_send_head(sk, tp, skb); | 2050 | update_send_head(sk, tp, skb); |
2036 | } | 2051 | } |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index b7d296a8ac6d..13e7e6e8df16 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
215 | vegas->beg_snd_nxt = tp->snd_nxt; | 215 | vegas->beg_snd_nxt = tp->snd_nxt; |
216 | vegas->beg_snd_cwnd = tp->snd_cwnd; | 216 | vegas->beg_snd_cwnd = tp->snd_cwnd; |
217 | 217 | ||
218 | /* Take into account the current RTT sample too, to | ||
219 | * decrease the impact of delayed acks. This double counts | ||
220 | * this sample since we count it for the next window as well, | ||
221 | * but that's not too awful, since we're taking the min, | ||
222 | * rather than averaging. | ||
223 | */ | ||
224 | tcp_vegas_rtt_calc(sk, seq_rtt * 1000); | ||
225 | |||
226 | /* We do the Vegas calculations only if we got enough RTT | 218 | /* We do the Vegas calculations only if we got enough RTT |
227 | * samples that we can be reasonably sure that we got | 219 | * samples that we can be reasonably sure that we got |
228 | * at least one RTT sample that wasn't from a delayed ACK. | 220 | * at least one RTT sample that wasn't from a delayed ACK. |
@@ -333,11 +325,11 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
333 | else if (tp->snd_cwnd > tp->snd_cwnd_clamp) | 325 | else if (tp->snd_cwnd > tp->snd_cwnd_clamp) |
334 | tp->snd_cwnd = tp->snd_cwnd_clamp; | 326 | tp->snd_cwnd = tp->snd_cwnd_clamp; |
335 | } | 327 | } |
336 | } | ||
337 | 328 | ||
338 | /* Wipe the slate clean for the next RTT. */ | 329 | /* Wipe the slate clean for the next RTT. */ |
339 | vegas->cntRTT = 0; | 330 | vegas->cntRTT = 0; |
340 | vegas->minRTT = 0x7fffffff; | 331 | vegas->minRTT = 0x7fffffff; |
332 | } | ||
341 | } | 333 | } |
342 | 334 | ||
343 | /* Extract info for Tcp socket info provided via netlink. */ | 335 | /* Extract info for Tcp socket info provided via netlink. */ |
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 40d9a1935ab5..8bfbe9970793 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c | |||
@@ -248,7 +248,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) | |||
248 | if (esp->conf.padlen) | 248 | if (esp->conf.padlen) |
249 | mtu = ALIGN(mtu, esp->conf.padlen); | 249 | mtu = ALIGN(mtu, esp->conf.padlen); |
250 | 250 | ||
251 | return mtu + x->props.header_len + esp->auth.icv_full_len; | 251 | return mtu + x->props.header_len + esp->auth.icv_trunc_len; |
252 | } | 252 | } |
253 | 253 | ||
254 | static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | 254 | static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index c0f1da5497a9..a7e03cfacd06 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | |||
@@ -68,8 +68,8 @@ static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, | |||
68 | [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 | 68 | [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 |
69 | }; | 69 | }; |
70 | 70 | ||
71 | __u8 type = orig->dst.u.icmp.type - 128; | 71 | int type = orig->dst.u.icmp.type - 128; |
72 | if (type >= sizeof(invmap) || !invmap[type]) | 72 | if (type < 0 || type >= sizeof(invmap) || !invmap[type]) |
73 | return 0; | 73 | return 0; |
74 | 74 | ||
75 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | 75 | tuple->src.u.icmp.id = orig->src.u.icmp.id; |
@@ -129,12 +129,12 @@ static int icmpv6_new(struct nf_conn *conntrack, | |||
129 | [ICMPV6_ECHO_REQUEST - 128] = 1, | 129 | [ICMPV6_ECHO_REQUEST - 128] = 1, |
130 | [ICMPV6_NI_QUERY - 128] = 1 | 130 | [ICMPV6_NI_QUERY - 128] = 1 |
131 | }; | 131 | }; |
132 | int type = conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128; | ||
132 | 133 | ||
133 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new) | 134 | if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { |
134 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) { | ||
135 | /* Can't create a new ICMPv6 `conn' with this. */ | 135 | /* Can't create a new ICMPv6 `conn' with this. */ |
136 | DEBUGP("icmp: can't create new conn with type %u\n", | 136 | DEBUGP("icmpv6: can't create new conn with type %u\n", |
137 | conntrack->tuplehash[0].tuple.dst.u.icmp.type); | 137 | type + 128); |
138 | NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); | 138 | NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); |
139 | return 0; | 139 | return 0; |
140 | } | 140 | } |
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a84f9221e5f0..794c41d19b28 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig | |||
@@ -61,8 +61,8 @@ config NF_CONNTRACK_MARK | |||
61 | instead of the individual packets. | 61 | instead of the individual packets. |
62 | 62 | ||
63 | config NF_CONNTRACK_EVENTS | 63 | config NF_CONNTRACK_EVENTS |
64 | bool "Connection tracking events" | 64 | bool "Connection tracking events (EXPERIMENTAL)" |
65 | depends on NF_CONNTRACK | 65 | depends on EXPERIMENTAL && NF_CONNTRACK |
66 | help | 66 | help |
67 | If this option is enabled, the connection tracking code will | 67 | If this option is enabled, the connection tracking code will |
68 | provide a notifier chain that can be used by other kernel code | 68 | provide a notifier chain that can be used by other kernel code |
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1da678303d78..a7c7b490cf22 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -1383,6 +1383,9 @@ void nf_conntrack_cleanup(void) | |||
1383 | schedule(); | 1383 | schedule(); |
1384 | goto i_see_dead_people; | 1384 | goto i_see_dead_people; |
1385 | } | 1385 | } |
1386 | /* wait until all references to nf_conntrack_untracked are dropped */ | ||
1387 | while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) | ||
1388 | schedule(); | ||
1386 | 1389 | ||
1387 | for (i = 0; i < NF_CT_F_NUM; i++) { | 1390 | for (i = 0; i < NF_CT_F_NUM; i++) { |
1388 | if (nf_ct_cache[i].use == 0) | 1391 | if (nf_ct_cache[i].use == 0) |
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a60c59b97631..95fdf04f1d88 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c | |||
@@ -162,7 +162,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, | |||
162 | return -EINVAL; | 162 | return -EINVAL; |
163 | } | 163 | } |
164 | 164 | ||
165 | min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); | 165 | min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); |
166 | if (unlikely(nlh->nlmsg_len < min_len)) | 166 | if (unlikely(nlh->nlmsg_len < min_len)) |
167 | return -EINVAL; | 167 | return -EINVAL; |
168 | 168 | ||
@@ -236,8 +236,7 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, | |||
236 | } | 236 | } |
237 | 237 | ||
238 | /* All the messages must at least contain nfgenmsg */ | 238 | /* All the messages must at least contain nfgenmsg */ |
239 | if (nlh->nlmsg_len < | 239 | if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) { |
240 | NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) { | ||
241 | DEBUGP("received message was too short\n"); | 240 | DEBUGP("received message was too short\n"); |
242 | return 0; | 241 | return 0; |
243 | } | 242 | } |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 499ae3df4a44..3e2462760413 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -1587,23 +1587,47 @@ static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order) | |||
1587 | return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1); | 1587 | return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1); |
1588 | } | 1588 | } |
1589 | 1589 | ||
1590 | static void free_pg_vec(char **pg_vec, unsigned order, unsigned len) | 1590 | static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) |
1591 | { | 1591 | { |
1592 | int i; | 1592 | int i; |
1593 | 1593 | ||
1594 | for (i=0; i<len; i++) { | 1594 | for (i = 0; i < len; i++) { |
1595 | if (pg_vec[i]) { | 1595 | if (likely(pg_vec[i])) |
1596 | struct page *page, *pend; | 1596 | free_pages((unsigned long) pg_vec[i], order); |
1597 | |||
1598 | pend = pg_vec_endpage(pg_vec[i], order); | ||
1599 | for (page = virt_to_page(pg_vec[i]); page <= pend; page++) | ||
1600 | ClearPageReserved(page); | ||
1601 | free_pages((unsigned long)pg_vec[i], order); | ||
1602 | } | ||
1603 | } | 1597 | } |
1604 | kfree(pg_vec); | 1598 | kfree(pg_vec); |
1605 | } | 1599 | } |
1606 | 1600 | ||
1601 | static inline char *alloc_one_pg_vec_page(unsigned long order) | ||
1602 | { | ||
1603 | return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, | ||
1604 | order); | ||
1605 | } | ||
1606 | |||
1607 | static char **alloc_pg_vec(struct tpacket_req *req, int order) | ||
1608 | { | ||
1609 | unsigned int block_nr = req->tp_block_nr; | ||
1610 | char **pg_vec; | ||
1611 | int i; | ||
1612 | |||
1613 | pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL); | ||
1614 | if (unlikely(!pg_vec)) | ||
1615 | goto out; | ||
1616 | |||
1617 | for (i = 0; i < block_nr; i++) { | ||
1618 | pg_vec[i] = alloc_one_pg_vec_page(order); | ||
1619 | if (unlikely(!pg_vec[i])) | ||
1620 | goto out_free_pgvec; | ||
1621 | } | ||
1622 | |||
1623 | out: | ||
1624 | return pg_vec; | ||
1625 | |||
1626 | out_free_pgvec: | ||
1627 | free_pg_vec(pg_vec, order, block_nr); | ||
1628 | pg_vec = NULL; | ||
1629 | goto out; | ||
1630 | } | ||
1607 | 1631 | ||
1608 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing) | 1632 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing) |
1609 | { | 1633 | { |
@@ -1617,64 +1641,46 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing | |||
1617 | 1641 | ||
1618 | /* Sanity tests and some calculations */ | 1642 | /* Sanity tests and some calculations */ |
1619 | 1643 | ||
1620 | if (po->pg_vec) | 1644 | if (unlikely(po->pg_vec)) |
1621 | return -EBUSY; | 1645 | return -EBUSY; |
1622 | 1646 | ||
1623 | if ((int)req->tp_block_size <= 0) | 1647 | if (unlikely((int)req->tp_block_size <= 0)) |
1624 | return -EINVAL; | 1648 | return -EINVAL; |
1625 | if (req->tp_block_size&(PAGE_SIZE-1)) | 1649 | if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) |
1626 | return -EINVAL; | 1650 | return -EINVAL; |
1627 | if (req->tp_frame_size < TPACKET_HDRLEN) | 1651 | if (unlikely(req->tp_frame_size < TPACKET_HDRLEN)) |
1628 | return -EINVAL; | 1652 | return -EINVAL; |
1629 | if (req->tp_frame_size&(TPACKET_ALIGNMENT-1)) | 1653 | if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) |
1630 | return -EINVAL; | 1654 | return -EINVAL; |
1631 | 1655 | ||
1632 | po->frames_per_block = req->tp_block_size/req->tp_frame_size; | 1656 | po->frames_per_block = req->tp_block_size/req->tp_frame_size; |
1633 | if (po->frames_per_block <= 0) | 1657 | if (unlikely(po->frames_per_block <= 0)) |
1634 | return -EINVAL; | 1658 | return -EINVAL; |
1635 | if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr) | 1659 | if (unlikely((po->frames_per_block * req->tp_block_nr) != |
1660 | req->tp_frame_nr)) | ||
1636 | return -EINVAL; | 1661 | return -EINVAL; |
1637 | /* OK! */ | ||
1638 | |||
1639 | /* Allocate page vector */ | ||
1640 | while ((PAGE_SIZE<<order) < req->tp_block_size) | ||
1641 | order++; | ||
1642 | 1662 | ||
1643 | err = -ENOMEM; | 1663 | err = -ENOMEM; |
1644 | 1664 | order = get_order(req->tp_block_size); | |
1645 | pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL); | 1665 | pg_vec = alloc_pg_vec(req, order); |
1646 | if (pg_vec == NULL) | 1666 | if (unlikely(!pg_vec)) |
1647 | goto out; | 1667 | goto out; |
1648 | memset(pg_vec, 0, req->tp_block_nr*sizeof(char **)); | ||
1649 | |||
1650 | for (i=0; i<req->tp_block_nr; i++) { | ||
1651 | struct page *page, *pend; | ||
1652 | pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order); | ||
1653 | if (!pg_vec[i]) | ||
1654 | goto out_free_pgvec; | ||
1655 | |||
1656 | pend = pg_vec_endpage(pg_vec[i], order); | ||
1657 | for (page = virt_to_page(pg_vec[i]); page <= pend; page++) | ||
1658 | SetPageReserved(page); | ||
1659 | } | ||
1660 | /* Page vector is allocated */ | ||
1661 | 1668 | ||
1662 | l = 0; | 1669 | l = 0; |
1663 | for (i=0; i<req->tp_block_nr; i++) { | 1670 | for (i = 0; i < req->tp_block_nr; i++) { |
1664 | char *ptr = pg_vec[i]; | 1671 | char *ptr = pg_vec[i]; |
1665 | struct tpacket_hdr *header; | 1672 | struct tpacket_hdr *header; |
1666 | int k; | 1673 | int k; |
1667 | 1674 | ||
1668 | for (k=0; k<po->frames_per_block; k++) { | 1675 | for (k = 0; k < po->frames_per_block; k++) { |
1669 | 1676 | header = (struct tpacket_hdr *) ptr; | |
1670 | header = (struct tpacket_hdr*)ptr; | ||
1671 | header->tp_status = TP_STATUS_KERNEL; | 1677 | header->tp_status = TP_STATUS_KERNEL; |
1672 | ptr += req->tp_frame_size; | 1678 | ptr += req->tp_frame_size; |
1673 | } | 1679 | } |
1674 | } | 1680 | } |
1675 | /* Done */ | 1681 | /* Done */ |
1676 | } else { | 1682 | } else { |
1677 | if (req->tp_frame_nr) | 1683 | if (unlikely(req->tp_frame_nr)) |
1678 | return -EINVAL; | 1684 | return -EINVAL; |
1679 | } | 1685 | } |
1680 | 1686 | ||
@@ -1701,7 +1707,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing | |||
1701 | 1707 | ||
1702 | spin_lock_bh(&sk->sk_receive_queue.lock); | 1708 | spin_lock_bh(&sk->sk_receive_queue.lock); |
1703 | pg_vec = XC(po->pg_vec, pg_vec); | 1709 | pg_vec = XC(po->pg_vec, pg_vec); |
1704 | po->frame_max = req->tp_frame_nr-1; | 1710 | po->frame_max = (req->tp_frame_nr - 1); |
1705 | po->head = 0; | 1711 | po->head = 0; |
1706 | po->frame_size = req->tp_frame_size; | 1712 | po->frame_size = req->tp_frame_size; |
1707 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 1713 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
@@ -1728,7 +1734,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing | |||
1728 | 1734 | ||
1729 | release_sock(sk); | 1735 | release_sock(sk); |
1730 | 1736 | ||
1731 | out_free_pgvec: | ||
1732 | if (pg_vec) | 1737 | if (pg_vec) |
1733 | free_pg_vec(pg_vec, order, req->tp_block_nr); | 1738 | free_pg_vec(pg_vec, order, req->tp_block_nr); |
1734 | out: | 1739 | out: |
@@ -1755,17 +1760,19 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st | |||
1755 | if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE) | 1760 | if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE) |
1756 | goto out; | 1761 | goto out; |
1757 | 1762 | ||
1758 | atomic_inc(&po->mapped); | ||
1759 | start = vma->vm_start; | 1763 | start = vma->vm_start; |
1760 | err = -EAGAIN; | 1764 | for (i = 0; i < po->pg_vec_len; i++) { |
1761 | for (i=0; i<po->pg_vec_len; i++) { | 1765 | struct page *page = virt_to_page(po->pg_vec[i]); |
1762 | if (remap_pfn_range(vma, start, | 1766 | int pg_num; |
1763 | __pa(po->pg_vec[i]) >> PAGE_SHIFT, | 1767 | |
1764 | po->pg_vec_pages*PAGE_SIZE, | 1768 | for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) { |
1765 | vma->vm_page_prot)) | 1769 | err = vm_insert_page(vma, start, page); |
1766 | goto out; | 1770 | if (unlikely(err)) |
1767 | start += po->pg_vec_pages*PAGE_SIZE; | 1771 | goto out; |
1772 | start += PAGE_SIZE; | ||
1773 | } | ||
1768 | } | 1774 | } |
1775 | atomic_inc(&po->mapped); | ||
1769 | vma->vm_ops = &packet_mmap_ops; | 1776 | vma->vm_ops = &packet_mmap_ops; |
1770 | err = 0; | 1777 | err = 0; |
1771 | 1778 | ||