diff options
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/ip_gre.c | 2 | ||||
| -rw-r--r-- | net/ipv4/netfilter/Kconfig | 8 | ||||
| -rw-r--r-- | net/ipv4/netfilter/Makefile | 3 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ip_conntrack_core.c | 20 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ip_conntrack_netlink.c | 12 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 3 | ||||
| -rw-r--r-- | net/ipv4/netfilter/ip_nat_tftp.c | 5 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 233 | ||||
| -rw-r--r-- | net/ipv4/tcp_vegas.c | 16 | ||||
| -rw-r--r-- | net/ipv4/xfrm4_policy.c | 1 |
10 files changed, 158 insertions, 145 deletions
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a4c347c3b8e3..46f9d9cf7a5f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
| @@ -618,7 +618,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
| 618 | 618 | ||
| 619 | skb->mac.raw = skb->nh.raw; | 619 | skb->mac.raw = skb->nh.raw; |
| 620 | skb->nh.raw = __pskb_pull(skb, offset); | 620 | skb->nh.raw = __pskb_pull(skb, offset); |
| 621 | skb_postpull_rcsum(skb, skb->mac.raw, offset); | 621 | skb_postpull_rcsum(skb, skb->h.raw, offset); |
| 622 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); | 622 | memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); |
| 623 | skb->pkt_type = PACKET_HOST; | 623 | skb->pkt_type = PACKET_HOST; |
| 624 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 624 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 0bc00528d888..88a60650e6b8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
| @@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK | |||
| 56 | instead of the individual packets. | 56 | instead of the individual packets. |
| 57 | 57 | ||
| 58 | config IP_NF_CONNTRACK_EVENTS | 58 | config IP_NF_CONNTRACK_EVENTS |
| 59 | bool "Connection tracking events" | 59 | bool "Connection tracking events (EXPERIMENTAL)" |
| 60 | depends on IP_NF_CONNTRACK | 60 | depends on EXPERIMENTAL && IP_NF_CONNTRACK |
| 61 | help | 61 | help |
| 62 | If this option is enabled, the connection tracking code will | 62 | If this option is enabled, the connection tracking code will |
| 63 | provide a notifier chain that can be used by other kernel code | 63 | provide a notifier chain that can be used by other kernel code |
| @@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS | |||
| 66 | IF unsure, say `N'. | 66 | IF unsure, say `N'. |
| 67 | 67 | ||
| 68 | config IP_NF_CONNTRACK_NETLINK | 68 | config IP_NF_CONNTRACK_NETLINK |
| 69 | tristate 'Connection tracking netlink interface' | 69 | tristate 'Connection tracking netlink interface (EXPERIMENTAL)' |
| 70 | depends on IP_NF_CONNTRACK && NETFILTER_NETLINK | 70 | depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK |
| 71 | depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m | 71 | depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m |
| 72 | help | 72 | help |
| 73 | This option enables support for a netlink-based userspace interface | 73 | This option enables support for a netlink-based userspace interface |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 058c48e258fc..d0a447e520a2 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
| @@ -12,6 +12,7 @@ ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o | |||
| 12 | 12 | ||
| 13 | # connection tracking | 13 | # connection tracking |
| 14 | obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o | 14 | obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o |
| 15 | obj-$(CONFIG_IP_NF_NAT) += ip_nat.o | ||
| 15 | 16 | ||
| 16 | # conntrack netlink interface | 17 | # conntrack netlink interface |
| 17 | obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o | 18 | obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o |
| @@ -41,7 +42,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o | |||
| 41 | # the three instances of ip_tables | 42 | # the three instances of ip_tables |
| 42 | obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o | 43 | obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o |
| 43 | obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o | 44 | obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o |
| 44 | obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o | 45 | obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o |
| 45 | obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o | 46 | obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o |
| 46 | 47 | ||
| 47 | # matches | 48 | # matches |
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 7a4ecddd597b..84c66dbfedaf 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
| @@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data) | |||
| 1345 | return 1; | 1345 | return 1; |
| 1346 | } | 1346 | } |
| 1347 | 1347 | ||
| 1348 | void ip_conntrack_flush(void) | ||
| 1349 | { | ||
| 1350 | ip_ct_iterate_cleanup(kill_all, NULL); | ||
| 1351 | } | ||
| 1352 | |||
| 1348 | static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) | 1353 | static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) |
| 1349 | { | 1354 | { |
| 1350 | if (vmalloced) | 1355 | if (vmalloced) |
| @@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) | |||
| 1354 | get_order(sizeof(struct list_head) * size)); | 1359 | get_order(sizeof(struct list_head) * size)); |
| 1355 | } | 1360 | } |
| 1356 | 1361 | ||
| 1357 | void ip_conntrack_flush(void) | 1362 | /* Mishearing the voices in his head, our hero wonders how he's |
| 1363 | supposed to kill the mall. */ | ||
| 1364 | void ip_conntrack_cleanup(void) | ||
| 1358 | { | 1365 | { |
| 1366 | ip_ct_attach = NULL; | ||
| 1367 | |||
| 1359 | /* This makes sure all current packets have passed through | 1368 | /* This makes sure all current packets have passed through |
| 1360 | netfilter framework. Roll on, two-stage module | 1369 | netfilter framework. Roll on, two-stage module |
| 1361 | delete... */ | 1370 | delete... */ |
| @@ -1363,7 +1372,7 @@ void ip_conntrack_flush(void) | |||
| 1363 | 1372 | ||
| 1364 | ip_ct_event_cache_flush(); | 1373 | ip_ct_event_cache_flush(); |
| 1365 | i_see_dead_people: | 1374 | i_see_dead_people: |
| 1366 | ip_ct_iterate_cleanup(kill_all, NULL); | 1375 | ip_conntrack_flush(); |
| 1367 | if (atomic_read(&ip_conntrack_count) != 0) { | 1376 | if (atomic_read(&ip_conntrack_count) != 0) { |
| 1368 | schedule(); | 1377 | schedule(); |
| 1369 | goto i_see_dead_people; | 1378 | goto i_see_dead_people; |
| @@ -1371,14 +1380,7 @@ void ip_conntrack_flush(void) | |||
| 1371 | /* wait until all references to ip_conntrack_untracked are dropped */ | 1380 | /* wait until all references to ip_conntrack_untracked are dropped */ |
| 1372 | while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) | 1381 | while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) |
| 1373 | schedule(); | 1382 | schedule(); |
| 1374 | } | ||
| 1375 | 1383 | ||
| 1376 | /* Mishearing the voices in his head, our hero wonders how he's | ||
| 1377 | supposed to kill the mall. */ | ||
| 1378 | void ip_conntrack_cleanup(void) | ||
| 1379 | { | ||
| 1380 | ip_ct_attach = NULL; | ||
| 1381 | ip_conntrack_flush(); | ||
| 1382 | kmem_cache_destroy(ip_conntrack_cachep); | 1384 | kmem_cache_destroy(ip_conntrack_cachep); |
| 1383 | kmem_cache_destroy(ip_conntrack_expect_cachep); | 1385 | kmem_cache_destroy(ip_conntrack_expect_cachep); |
| 1384 | free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, | 1386 | free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, |
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 3fce91bcc0ba..91fe8f2e38ff 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c | |||
| @@ -503,7 +503,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) | |||
| 503 | } | 503 | } |
| 504 | 504 | ||
| 505 | static const size_t cta_min_proto[CTA_PROTO_MAX] = { | 505 | static const size_t cta_min_proto[CTA_PROTO_MAX] = { |
| 506 | [CTA_PROTO_NUM-1] = sizeof(u_int16_t), | 506 | [CTA_PROTO_NUM-1] = sizeof(u_int8_t), |
| 507 | [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), | 507 | [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), |
| 508 | [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), | 508 | [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), |
| 509 | [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), | 509 | [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), |
| @@ -528,7 +528,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr, | |||
| 528 | 528 | ||
| 529 | if (!tb[CTA_PROTO_NUM-1]) | 529 | if (!tb[CTA_PROTO_NUM-1]) |
| 530 | return -EINVAL; | 530 | return -EINVAL; |
| 531 | tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); | 531 | tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); |
| 532 | 532 | ||
| 533 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); | 533 | proto = ip_conntrack_proto_find_get(tuple->dst.protonum); |
| 534 | 534 | ||
| @@ -728,11 +728,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
| 728 | return -ENOENT; | 728 | return -ENOENT; |
| 729 | } | 729 | } |
| 730 | } | 730 | } |
| 731 | if (del_timer(&ct->timeout)) { | 731 | if (del_timer(&ct->timeout)) |
| 732 | ip_conntrack_put(ct); | ||
| 733 | ct->timeout.function((unsigned long)ct); | 732 | ct->timeout.function((unsigned long)ct); |
| 734 | return 0; | 733 | |
| 735 | } | ||
| 736 | ip_conntrack_put(ct); | 734 | ip_conntrack_put(ct); |
| 737 | DEBUGP("leaving\n"); | 735 | DEBUGP("leaving\n"); |
| 738 | 736 | ||
| @@ -877,7 +875,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) | |||
| 877 | DEBUGP("NAT status: %lu\n", | 875 | DEBUGP("NAT status: %lu\n", |
| 878 | status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); | 876 | status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); |
| 879 | 877 | ||
| 880 | if (ip_nat_initialized(ct, hooknum)) | 878 | if (ip_nat_initialized(ct, HOOK2MANIP(hooknum))) |
| 881 | return -EEXIST; | 879 | return -EEXIST; |
| 882 | ip_nat_setup_info(ct, &range, hooknum); | 880 | ip_nat_setup_info(ct, &range, hooknum); |
| 883 | 881 | ||
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index aeb7353d4777..e7fa29e576dc 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c | |||
| @@ -341,9 +341,10 @@ static int tcp_print_conntrack(struct seq_file *s, | |||
| 341 | static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, | 341 | static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, |
| 342 | const struct ip_conntrack *ct) | 342 | const struct ip_conntrack *ct) |
| 343 | { | 343 | { |
| 344 | struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); | 344 | struct nfattr *nest_parms; |
| 345 | 345 | ||
| 346 | read_lock_bh(&tcp_lock); | 346 | read_lock_bh(&tcp_lock); |
| 347 | nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); | ||
| 347 | NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), | 348 | NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), |
| 348 | &ct->proto.tcp.state); | 349 | &ct->proto.tcp.state); |
| 349 | read_unlock_bh(&tcp_lock); | 350 | read_unlock_bh(&tcp_lock); |
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c index 2215317c76b7..43c3bd7c118e 100644 --- a/net/ipv4/netfilter/ip_nat_tftp.c +++ b/net/ipv4/netfilter/ip_nat_tftp.c | |||
| @@ -42,7 +42,10 @@ static unsigned int help(struct sk_buff **pskb, | |||
| 42 | enum ip_conntrack_info ctinfo, | 42 | enum ip_conntrack_info ctinfo, |
| 43 | struct ip_conntrack_expect *exp) | 43 | struct ip_conntrack_expect *exp) |
| 44 | { | 44 | { |
| 45 | exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port; | 45 | struct ip_conntrack *ct = exp->master; |
| 46 | |||
| 47 | exp->saved_proto.udp.port | ||
| 48 | = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; | ||
| 46 | exp->dir = IP_CT_DIR_REPLY; | 49 | exp->dir = IP_CT_DIR_REPLY; |
| 47 | exp->expectfn = ip_nat_follow_master; | 50 | exp->expectfn = ip_nat_follow_master; |
| 48 | if (ip_conntrack_expect_related(exp) != 0) | 51 | if (ip_conntrack_expect_related(exp) != 0) |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 029c70dfb585..b7325e0b406a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -262,122 +262,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk) | |||
| 262 | * We are working here with either a clone of the original | 262 | * We are working here with either a clone of the original |
| 263 | * SKB, or a fresh unique copy made by the retransmit engine. | 263 | * SKB, or a fresh unique copy made by the retransmit engine. |
| 264 | */ | 264 | */ |
| 265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) | 265 | static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) |
| 266 | { | 266 | { |
| 267 | if (skb != NULL) { | 267 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| 268 | const struct inet_connection_sock *icsk = inet_csk(sk); | 268 | struct inet_sock *inet; |
| 269 | struct inet_sock *inet = inet_sk(sk); | 269 | struct tcp_sock *tp; |
| 270 | struct tcp_sock *tp = tcp_sk(sk); | 270 | struct tcp_skb_cb *tcb; |
| 271 | struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); | 271 | int tcp_header_size; |
| 272 | int tcp_header_size = tp->tcp_header_len; | 272 | struct tcphdr *th; |
| 273 | struct tcphdr *th; | 273 | int sysctl_flags; |
| 274 | int sysctl_flags; | 274 | int err; |
| 275 | int err; | 275 | |
| 276 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | ||
| 277 | |||
| 278 | /* If congestion control is doing timestamping, we must | ||
| 279 | * take such a timestamp before we potentially clone/copy. | ||
| 280 | */ | ||
| 281 | if (icsk->icsk_ca_ops->rtt_sample) | ||
| 282 | __net_timestamp(skb); | ||
| 283 | |||
| 284 | if (likely(clone_it)) { | ||
| 285 | if (unlikely(skb_cloned(skb))) | ||
| 286 | skb = pskb_copy(skb, gfp_mask); | ||
| 287 | else | ||
| 288 | skb = skb_clone(skb, gfp_mask); | ||
| 289 | if (unlikely(!skb)) | ||
| 290 | return -ENOBUFS; | ||
| 291 | } | ||
| 276 | 292 | ||
| 277 | BUG_ON(!tcp_skb_pcount(skb)); | 293 | inet = inet_sk(sk); |
| 294 | tp = tcp_sk(sk); | ||
| 295 | tcb = TCP_SKB_CB(skb); | ||
| 296 | tcp_header_size = tp->tcp_header_len; | ||
| 278 | 297 | ||
| 279 | #define SYSCTL_FLAG_TSTAMPS 0x1 | 298 | #define SYSCTL_FLAG_TSTAMPS 0x1 |
| 280 | #define SYSCTL_FLAG_WSCALE 0x2 | 299 | #define SYSCTL_FLAG_WSCALE 0x2 |
| 281 | #define SYSCTL_FLAG_SACK 0x4 | 300 | #define SYSCTL_FLAG_SACK 0x4 |
| 282 | 301 | ||
| 283 | /* If congestion control is doing timestamping */ | 302 | sysctl_flags = 0; |
| 284 | if (icsk->icsk_ca_ops->rtt_sample) | 303 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { |
| 285 | __net_timestamp(skb); | 304 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; |
| 286 | 305 | if(sysctl_tcp_timestamps) { | |
| 287 | sysctl_flags = 0; | 306 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; |
| 288 | if (tcb->flags & TCPCB_FLAG_SYN) { | 307 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; |
| 289 | tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; | ||
| 290 | if(sysctl_tcp_timestamps) { | ||
| 291 | tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; | ||
| 292 | sysctl_flags |= SYSCTL_FLAG_TSTAMPS; | ||
| 293 | } | ||
| 294 | if(sysctl_tcp_window_scaling) { | ||
| 295 | tcp_header_size += TCPOLEN_WSCALE_ALIGNED; | ||
| 296 | sysctl_flags |= SYSCTL_FLAG_WSCALE; | ||
| 297 | } | ||
| 298 | if(sysctl_tcp_sack) { | ||
| 299 | sysctl_flags |= SYSCTL_FLAG_SACK; | ||
| 300 | if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) | ||
| 301 | tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; | ||
| 302 | } | ||
| 303 | } else if (tp->rx_opt.eff_sacks) { | ||
| 304 | /* A SACK is 2 pad bytes, a 2 byte header, plus | ||
| 305 | * 2 32-bit sequence numbers for each SACK block. | ||
| 306 | */ | ||
| 307 | tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + | ||
| 308 | (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); | ||
| 309 | } | 308 | } |
| 310 | 309 | if (sysctl_tcp_window_scaling) { | |
| 311 | if (tcp_packets_in_flight(tp) == 0) | 310 | tcp_header_size += TCPOLEN_WSCALE_ALIGNED; |
| 312 | tcp_ca_event(sk, CA_EVENT_TX_START); | 311 | sysctl_flags |= SYSCTL_FLAG_WSCALE; |
| 313 | |||
| 314 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); | ||
| 315 | skb->h.th = th; | ||
| 316 | skb_set_owner_w(skb, sk); | ||
| 317 | |||
| 318 | /* Build TCP header and checksum it. */ | ||
| 319 | th->source = inet->sport; | ||
| 320 | th->dest = inet->dport; | ||
| 321 | th->seq = htonl(tcb->seq); | ||
| 322 | th->ack_seq = htonl(tp->rcv_nxt); | ||
| 323 | *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); | ||
| 324 | if (tcb->flags & TCPCB_FLAG_SYN) { | ||
| 325 | /* RFC1323: The window in SYN & SYN/ACK segments | ||
| 326 | * is never scaled. | ||
| 327 | */ | ||
| 328 | th->window = htons(tp->rcv_wnd); | ||
| 329 | } else { | ||
| 330 | th->window = htons(tcp_select_window(sk)); | ||
| 331 | } | 312 | } |
| 332 | th->check = 0; | 313 | if (sysctl_tcp_sack) { |
| 333 | th->urg_ptr = 0; | 314 | sysctl_flags |= SYSCTL_FLAG_SACK; |
| 334 | 315 | if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS)) | |
| 335 | if (tp->urg_mode && | 316 | tcp_header_size += TCPOLEN_SACKPERM_ALIGNED; |
| 336 | between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) { | ||
| 337 | th->urg_ptr = htons(tp->snd_up-tcb->seq); | ||
| 338 | th->urg = 1; | ||
| 339 | } | 317 | } |
| 318 | } else if (unlikely(tp->rx_opt.eff_sacks)) { | ||
| 319 | /* A SACK is 2 pad bytes, a 2 byte header, plus | ||
| 320 | * 2 32-bit sequence numbers for each SACK block. | ||
| 321 | */ | ||
| 322 | tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + | ||
| 323 | (tp->rx_opt.eff_sacks * | ||
| 324 | TCPOLEN_SACK_PERBLOCK)); | ||
| 325 | } | ||
| 326 | |||
| 327 | if (tcp_packets_in_flight(tp) == 0) | ||
| 328 | tcp_ca_event(sk, CA_EVENT_TX_START); | ||
| 329 | |||
| 330 | th = (struct tcphdr *) skb_push(skb, tcp_header_size); | ||
| 331 | skb->h.th = th; | ||
| 332 | skb_set_owner_w(skb, sk); | ||
| 333 | |||
| 334 | /* Build TCP header and checksum it. */ | ||
| 335 | th->source = inet->sport; | ||
| 336 | th->dest = inet->dport; | ||
| 337 | th->seq = htonl(tcb->seq); | ||
| 338 | th->ack_seq = htonl(tp->rcv_nxt); | ||
| 339 | *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | | ||
| 340 | tcb->flags); | ||
| 341 | |||
| 342 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | ||
| 343 | /* RFC1323: The window in SYN & SYN/ACK segments | ||
| 344 | * is never scaled. | ||
| 345 | */ | ||
| 346 | th->window = htons(tp->rcv_wnd); | ||
| 347 | } else { | ||
| 348 | th->window = htons(tcp_select_window(sk)); | ||
| 349 | } | ||
| 350 | th->check = 0; | ||
| 351 | th->urg_ptr = 0; | ||
| 340 | 352 | ||
| 341 | if (tcb->flags & TCPCB_FLAG_SYN) { | 353 | if (unlikely(tp->urg_mode && |
| 342 | tcp_syn_build_options((__u32 *)(th + 1), | 354 | between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { |
| 343 | tcp_advertise_mss(sk), | 355 | th->urg_ptr = htons(tp->snd_up-tcb->seq); |
| 344 | (sysctl_flags & SYSCTL_FLAG_TSTAMPS), | 356 | th->urg = 1; |
| 345 | (sysctl_flags & SYSCTL_FLAG_SACK), | 357 | } |
| 346 | (sysctl_flags & SYSCTL_FLAG_WSCALE), | ||
| 347 | tp->rx_opt.rcv_wscale, | ||
| 348 | tcb->when, | ||
| 349 | tp->rx_opt.ts_recent); | ||
| 350 | } else { | ||
| 351 | tcp_build_and_update_options((__u32 *)(th + 1), | ||
| 352 | tp, tcb->when); | ||
| 353 | 358 | ||
| 354 | TCP_ECN_send(sk, tp, skb, tcp_header_size); | 359 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { |
| 355 | } | 360 | tcp_syn_build_options((__u32 *)(th + 1), |
| 356 | tp->af_specific->send_check(sk, th, skb->len, skb); | 361 | tcp_advertise_mss(sk), |
| 362 | (sysctl_flags & SYSCTL_FLAG_TSTAMPS), | ||
| 363 | (sysctl_flags & SYSCTL_FLAG_SACK), | ||
| 364 | (sysctl_flags & SYSCTL_FLAG_WSCALE), | ||
| 365 | tp->rx_opt.rcv_wscale, | ||
| 366 | tcb->when, | ||
| 367 | tp->rx_opt.ts_recent); | ||
| 368 | } else { | ||
| 369 | tcp_build_and_update_options((__u32 *)(th + 1), | ||
| 370 | tp, tcb->when); | ||
| 371 | TCP_ECN_send(sk, tp, skb, tcp_header_size); | ||
| 372 | } | ||
| 357 | 373 | ||
| 358 | if (tcb->flags & TCPCB_FLAG_ACK) | 374 | tp->af_specific->send_check(sk, th, skb->len, skb); |
| 359 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); | ||
| 360 | 375 | ||
| 361 | if (skb->len != tcp_header_size) | 376 | if (likely(tcb->flags & TCPCB_FLAG_ACK)) |
| 362 | tcp_event_data_sent(tp, skb, sk); | 377 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); |
| 363 | 378 | ||
| 364 | TCP_INC_STATS(TCP_MIB_OUTSEGS); | 379 | if (skb->len != tcp_header_size) |
| 380 | tcp_event_data_sent(tp, skb, sk); | ||
| 365 | 381 | ||
| 366 | err = tp->af_specific->queue_xmit(skb, 0); | 382 | TCP_INC_STATS(TCP_MIB_OUTSEGS); |
| 367 | if (err <= 0) | ||
| 368 | return err; | ||
| 369 | 383 | ||
| 370 | tcp_enter_cwr(sk); | 384 | err = tp->af_specific->queue_xmit(skb, 0); |
| 385 | if (unlikely(err <= 0)) | ||
| 386 | return err; | ||
| 387 | |||
| 388 | tcp_enter_cwr(sk); | ||
| 389 | |||
| 390 | /* NET_XMIT_CN is special. It does not guarantee, | ||
| 391 | * that this packet is lost. It tells that device | ||
| 392 | * is about to start to drop packets or already | ||
| 393 | * drops some packets of the same priority and | ||
| 394 | * invokes us to send less aggressively. | ||
| 395 | */ | ||
| 396 | return err == NET_XMIT_CN ? 0 : err; | ||
| 371 | 397 | ||
| 372 | /* NET_XMIT_CN is special. It does not guarantee, | ||
| 373 | * that this packet is lost. It tells that device | ||
| 374 | * is about to start to drop packets or already | ||
| 375 | * drops some packets of the same priority and | ||
| 376 | * invokes us to send less aggressively. | ||
| 377 | */ | ||
| 378 | return err == NET_XMIT_CN ? 0 : err; | ||
| 379 | } | ||
| 380 | return -ENOBUFS; | ||
| 381 | #undef SYSCTL_FLAG_TSTAMPS | 398 | #undef SYSCTL_FLAG_TSTAMPS |
| 382 | #undef SYSCTL_FLAG_WSCALE | 399 | #undef SYSCTL_FLAG_WSCALE |
| 383 | #undef SYSCTL_FLAG_SACK | 400 | #undef SYSCTL_FLAG_SACK |
| @@ -1036,7 +1053,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
| 1036 | 1053 | ||
| 1037 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1054 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 1038 | 1055 | ||
| 1039 | if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) | 1056 | if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) |
| 1040 | break; | 1057 | break; |
| 1041 | 1058 | ||
| 1042 | /* Advance the send_head. This one is sent out. | 1059 | /* Advance the send_head. This one is sent out. |
| @@ -1109,7 +1126,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
| 1109 | /* Send it out now. */ | 1126 | /* Send it out now. */ |
| 1110 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1127 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 1111 | 1128 | ||
| 1112 | if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) { | 1129 | if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { |
| 1113 | update_send_head(sk, tp, skb); | 1130 | update_send_head(sk, tp, skb); |
| 1114 | tcp_cwnd_validate(sk, tp); | 1131 | tcp_cwnd_validate(sk, tp); |
| 1115 | return; | 1132 | return; |
| @@ -1429,9 +1446,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 1429 | */ | 1446 | */ |
| 1430 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1447 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 1431 | 1448 | ||
| 1432 | err = tcp_transmit_skb(sk, (skb_cloned(skb) ? | 1449 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
| 1433 | pskb_copy(skb, GFP_ATOMIC): | ||
| 1434 | skb_clone(skb, GFP_ATOMIC))); | ||
| 1435 | 1450 | ||
| 1436 | if (err == 0) { | 1451 | if (err == 0) { |
| 1437 | /* Update global TCP statistics. */ | 1452 | /* Update global TCP statistics. */ |
| @@ -1665,7 +1680,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
| 1665 | TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); | 1680 | TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); |
| 1666 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; | 1681 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; |
| 1667 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1682 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 1668 | if (tcp_transmit_skb(sk, skb)) | 1683 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
| 1669 | NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); | 1684 | NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); |
| 1670 | } | 1685 | } |
| 1671 | 1686 | ||
| @@ -1700,7 +1715,7 @@ int tcp_send_synack(struct sock *sk) | |||
| 1700 | TCP_ECN_send_synack(tcp_sk(sk), skb); | 1715 | TCP_ECN_send_synack(tcp_sk(sk), skb); |
| 1701 | } | 1716 | } |
| 1702 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1717 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 1703 | return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); | 1718 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
| 1704 | } | 1719 | } |
| 1705 | 1720 | ||
| 1706 | /* | 1721 | /* |
| @@ -1861,7 +1876,7 @@ int tcp_connect(struct sock *sk) | |||
| 1861 | __skb_queue_tail(&sk->sk_write_queue, buff); | 1876 | __skb_queue_tail(&sk->sk_write_queue, buff); |
| 1862 | sk_charge_skb(sk, buff); | 1877 | sk_charge_skb(sk, buff); |
| 1863 | tp->packets_out += tcp_skb_pcount(buff); | 1878 | tp->packets_out += tcp_skb_pcount(buff); |
| 1864 | tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); | 1879 | tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); |
| 1865 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); | 1880 | TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); |
| 1866 | 1881 | ||
| 1867 | /* Timer for repeating the SYN until an answer. */ | 1882 | /* Timer for repeating the SYN until an answer. */ |
| @@ -1957,7 +1972,7 @@ void tcp_send_ack(struct sock *sk) | |||
| 1957 | /* Send it off, this clears delayed acks for us. */ | 1972 | /* Send it off, this clears delayed acks for us. */ |
| 1958 | TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); | 1973 | TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); |
| 1959 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 1974 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
| 1960 | tcp_transmit_skb(sk, buff); | 1975 | tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); |
| 1961 | } | 1976 | } |
| 1962 | } | 1977 | } |
| 1963 | 1978 | ||
| @@ -1997,7 +2012,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
| 1997 | TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; | 2012 | TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; |
| 1998 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; | 2013 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; |
| 1999 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2014 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 2000 | return tcp_transmit_skb(sk, skb); | 2015 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
| 2001 | } | 2016 | } |
| 2002 | 2017 | ||
| 2003 | int tcp_write_wakeup(struct sock *sk) | 2018 | int tcp_write_wakeup(struct sock *sk) |
| @@ -2030,7 +2045,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
| 2030 | 2045 | ||
| 2031 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 2046 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; |
| 2032 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2047 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
| 2033 | err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); | 2048 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
| 2034 | if (!err) { | 2049 | if (!err) { |
| 2035 | update_send_head(sk, tp, skb); | 2050 | update_send_head(sk, tp, skb); |
| 2036 | } | 2051 | } |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index b7d296a8ac6d..13e7e6e8df16 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
| @@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
| 215 | vegas->beg_snd_nxt = tp->snd_nxt; | 215 | vegas->beg_snd_nxt = tp->snd_nxt; |
| 216 | vegas->beg_snd_cwnd = tp->snd_cwnd; | 216 | vegas->beg_snd_cwnd = tp->snd_cwnd; |
| 217 | 217 | ||
| 218 | /* Take into account the current RTT sample too, to | ||
| 219 | * decrease the impact of delayed acks. This double counts | ||
| 220 | * this sample since we count it for the next window as well, | ||
| 221 | * but that's not too awful, since we're taking the min, | ||
| 222 | * rather than averaging. | ||
| 223 | */ | ||
| 224 | tcp_vegas_rtt_calc(sk, seq_rtt * 1000); | ||
| 225 | |||
| 226 | /* We do the Vegas calculations only if we got enough RTT | 218 | /* We do the Vegas calculations only if we got enough RTT |
| 227 | * samples that we can be reasonably sure that we got | 219 | * samples that we can be reasonably sure that we got |
| 228 | * at least one RTT sample that wasn't from a delayed ACK. | 220 | * at least one RTT sample that wasn't from a delayed ACK. |
| @@ -333,11 +325,11 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, | |||
| 333 | else if (tp->snd_cwnd > tp->snd_cwnd_clamp) | 325 | else if (tp->snd_cwnd > tp->snd_cwnd_clamp) |
| 334 | tp->snd_cwnd = tp->snd_cwnd_clamp; | 326 | tp->snd_cwnd = tp->snd_cwnd_clamp; |
| 335 | } | 327 | } |
| 336 | } | ||
| 337 | 328 | ||
| 338 | /* Wipe the slate clean for the next RTT. */ | 329 | /* Wipe the slate clean for the next RTT. */ |
| 339 | vegas->cntRTT = 0; | 330 | vegas->cntRTT = 0; |
| 340 | vegas->minRTT = 0x7fffffff; | 331 | vegas->minRTT = 0x7fffffff; |
| 332 | } | ||
| 341 | } | 333 | } |
| 342 | 334 | ||
| 343 | /* Extract info for Tcp socket info provided via netlink. */ | 335 | /* Extract info for Tcp socket info provided via netlink. */ |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b2b60f3e9cdd..42196ba3b0b9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
| @@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) | |||
| 182 | case IPPROTO_UDP: | 182 | case IPPROTO_UDP: |
| 183 | case IPPROTO_TCP: | 183 | case IPPROTO_TCP: |
| 184 | case IPPROTO_SCTP: | 184 | case IPPROTO_SCTP: |
| 185 | case IPPROTO_DCCP: | ||
| 185 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { | 186 | if (pskb_may_pull(skb, xprth + 4 - skb->data)) { |
| 186 | u16 *ports = (u16 *)xprth; | 187 | u16 *ports = (u16 *)xprth; |
| 187 | 188 | ||
