diff options
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r-- | net/ipv4/ip_output.c | 123 |
1 files changed, 61 insertions, 62 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d096332f6c6d..534650cad3a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -95,8 +95,8 @@ __inline__ void ip_send_check(struct iphdr *iph) | |||
95 | /* dev_loopback_xmit for use with netfilter. */ | 95 | /* dev_loopback_xmit for use with netfilter. */ |
96 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) | 96 | static int ip_dev_loopback_xmit(struct sk_buff *newskb) |
97 | { | 97 | { |
98 | newskb->mac.raw = newskb->data; | 98 | skb_reset_mac_header(newskb); |
99 | __skb_pull(newskb, newskb->nh.raw - newskb->data); | 99 | __skb_pull(newskb, skb_network_offset(newskb)); |
100 | newskb->pkt_type = PACKET_LOOPBACK; | 100 | newskb->pkt_type = PACKET_LOOPBACK; |
101 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | 101 | newskb->ip_summed = CHECKSUM_UNNECESSARY; |
102 | BUG_TRAP(newskb->dst); | 102 | BUG_TRAP(newskb->dst); |
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
125 | struct iphdr *iph; | 125 | struct iphdr *iph; |
126 | 126 | ||
127 | /* Build the IP header. */ | 127 | /* Build the IP header. */ |
128 | if (opt) | 128 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); |
129 | iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen); | 129 | skb_reset_network_header(skb); |
130 | else | 130 | iph = ip_hdr(skb); |
131 | iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr)); | ||
132 | |||
133 | iph->version = 4; | 131 | iph->version = 4; |
134 | iph->ihl = 5; | 132 | iph->ihl = 5; |
135 | iph->tos = inet->tos; | 133 | iph->tos = inet->tos; |
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
143 | iph->protocol = sk->sk_protocol; | 141 | iph->protocol = sk->sk_protocol; |
144 | iph->tot_len = htons(skb->len); | 142 | iph->tot_len = htons(skb->len); |
145 | ip_select_ident(iph, &rt->u.dst, sk); | 143 | ip_select_ident(iph, &rt->u.dst, sk); |
146 | skb->nh.iph = iph; | ||
147 | 144 | ||
148 | if (opt && opt->optlen) { | 145 | if (opt && opt->optlen) { |
149 | iph->ihl += opt->optlen>>2; | 146 | iph->ihl += opt->optlen>>2; |
@@ -192,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
192 | return -EINVAL; | 189 | return -EINVAL; |
193 | } | 190 | } |
194 | 191 | ||
192 | static inline int ip_skb_dst_mtu(struct sk_buff *skb) | ||
193 | { | ||
194 | struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL; | ||
195 | |||
196 | return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ? | ||
197 | skb->dst->dev->mtu : dst_mtu(skb->dst); | ||
198 | } | ||
199 | |||
195 | static inline int ip_finish_output(struct sk_buff *skb) | 200 | static inline int ip_finish_output(struct sk_buff *skb) |
196 | { | 201 | { |
197 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) | 202 | #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) |
@@ -201,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb) | |||
201 | return dst_output(skb); | 206 | return dst_output(skb); |
202 | } | 207 | } |
203 | #endif | 208 | #endif |
204 | if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) | 209 | if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) |
205 | return ip_fragment(skb, ip_finish_output2); | 210 | return ip_fragment(skb, ip_finish_output2); |
206 | else | 211 | else |
207 | return ip_finish_output2(skb); | 212 | return ip_finish_output2(skb); |
@@ -248,7 +253,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
248 | 253 | ||
249 | /* Multicasts with ttl 0 must not go beyond the host */ | 254 | /* Multicasts with ttl 0 must not go beyond the host */ |
250 | 255 | ||
251 | if (skb->nh.iph->ttl == 0) { | 256 | if (ip_hdr(skb)->ttl == 0) { |
252 | kfree_skb(skb); | 257 | kfree_skb(skb); |
253 | return 0; | 258 | return 0; |
254 | } | 259 | } |
@@ -333,7 +338,9 @@ packet_routed: | |||
333 | goto no_route; | 338 | goto no_route; |
334 | 339 | ||
335 | /* OK, we know where to send it, allocate and build IP header. */ | 340 | /* OK, we know where to send it, allocate and build IP header. */ |
336 | iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 341 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); |
342 | skb_reset_network_header(skb); | ||
343 | iph = ip_hdr(skb); | ||
337 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 344 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
338 | iph->tot_len = htons(skb->len); | 345 | iph->tot_len = htons(skb->len); |
339 | if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) | 346 | if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) |
@@ -344,7 +351,6 @@ packet_routed: | |||
344 | iph->protocol = sk->sk_protocol; | 351 | iph->protocol = sk->sk_protocol; |
345 | iph->saddr = rt->rt_src; | 352 | iph->saddr = rt->rt_src; |
346 | iph->daddr = rt->rt_dst; | 353 | iph->daddr = rt->rt_dst; |
347 | skb->nh.iph = iph; | ||
348 | /* Transport layer set skb->h.foo itself. */ | 354 | /* Transport layer set skb->h.foo itself. */ |
349 | 355 | ||
350 | if (opt && opt->optlen) { | 356 | if (opt && opt->optlen) { |
@@ -386,21 +392,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
386 | #ifdef CONFIG_NET_SCHED | 392 | #ifdef CONFIG_NET_SCHED |
387 | to->tc_index = from->tc_index; | 393 | to->tc_index = from->tc_index; |
388 | #endif | 394 | #endif |
389 | #ifdef CONFIG_NETFILTER | 395 | nf_copy(to, from); |
390 | /* Connection association is same as pre-frag packet */ | ||
391 | nf_conntrack_put(to->nfct); | ||
392 | to->nfct = from->nfct; | ||
393 | nf_conntrack_get(to->nfct); | ||
394 | to->nfctinfo = from->nfctinfo; | ||
395 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | 396 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) |
396 | to->ipvs_property = from->ipvs_property; | 397 | to->ipvs_property = from->ipvs_property; |
397 | #endif | 398 | #endif |
398 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
399 | nf_bridge_put(to->nf_bridge); | ||
400 | to->nf_bridge = from->nf_bridge; | ||
401 | nf_bridge_get(to->nf_bridge); | ||
402 | #endif | ||
403 | #endif | ||
404 | skb_copy_secmark(to, from); | 399 | skb_copy_secmark(to, from); |
405 | } | 400 | } |
406 | 401 | ||
@@ -430,12 +425,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
430 | * Point into the IP datagram header. | 425 | * Point into the IP datagram header. |
431 | */ | 426 | */ |
432 | 427 | ||
433 | iph = skb->nh.iph; | 428 | iph = ip_hdr(skb); |
434 | 429 | ||
435 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { | 430 | if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { |
436 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); | 431 | IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); |
437 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 432 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
438 | htonl(dst_mtu(&rt->u.dst))); | 433 | htonl(ip_skb_dst_mtu(skb))); |
439 | kfree_skb(skb); | 434 | kfree_skb(skb); |
440 | return -EMSGSIZE; | 435 | return -EMSGSIZE; |
441 | } | 436 | } |
@@ -502,10 +497,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
502 | * before previous one went down. */ | 497 | * before previous one went down. */ |
503 | if (frag) { | 498 | if (frag) { |
504 | frag->ip_summed = CHECKSUM_NONE; | 499 | frag->ip_summed = CHECKSUM_NONE; |
505 | frag->h.raw = frag->data; | 500 | skb_reset_transport_header(frag); |
506 | frag->nh.raw = __skb_push(frag, hlen); | 501 | __skb_push(frag, hlen); |
507 | memcpy(frag->nh.raw, iph, hlen); | 502 | skb_reset_network_header(frag); |
508 | iph = frag->nh.iph; | 503 | memcpy(skb_network_header(frag), iph, hlen); |
504 | iph = ip_hdr(frag); | ||
509 | iph->tot_len = htons(frag->len); | 505 | iph->tot_len = htons(frag->len); |
510 | ip_copy_metadata(frag, skb); | 506 | ip_copy_metadata(frag, skb); |
511 | if (offset == 0) | 507 | if (offset == 0) |
@@ -566,7 +562,7 @@ slow_path: | |||
566 | * Keep copying data until we run out. | 562 | * Keep copying data until we run out. |
567 | */ | 563 | */ |
568 | 564 | ||
569 | while(left > 0) { | 565 | while (left > 0) { |
570 | len = left; | 566 | len = left; |
571 | /* IF: it doesn't fit, use 'mtu' - the data space left */ | 567 | /* IF: it doesn't fit, use 'mtu' - the data space left */ |
572 | if (len > mtu) | 568 | if (len > mtu) |
@@ -593,8 +589,8 @@ slow_path: | |||
593 | ip_copy_metadata(skb2, skb); | 589 | ip_copy_metadata(skb2, skb); |
594 | skb_reserve(skb2, ll_rs); | 590 | skb_reserve(skb2, ll_rs); |
595 | skb_put(skb2, len + hlen); | 591 | skb_put(skb2, len + hlen); |
596 | skb2->nh.raw = skb2->data; | 592 | skb_reset_network_header(skb2); |
597 | skb2->h.raw = skb2->data + hlen; | 593 | skb2->transport_header = skb2->network_header + hlen; |
598 | 594 | ||
599 | /* | 595 | /* |
600 | * Charge the memory for the fragment to any owner | 596 | * Charge the memory for the fragment to any owner |
@@ -608,19 +604,19 @@ slow_path: | |||
608 | * Copy the packet header into the new buffer. | 604 | * Copy the packet header into the new buffer. |
609 | */ | 605 | */ |
610 | 606 | ||
611 | memcpy(skb2->nh.raw, skb->data, hlen); | 607 | skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); |
612 | 608 | ||
613 | /* | 609 | /* |
614 | * Copy a block of the IP datagram. | 610 | * Copy a block of the IP datagram. |
615 | */ | 611 | */ |
616 | if (skb_copy_bits(skb, ptr, skb2->h.raw, len)) | 612 | if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) |
617 | BUG(); | 613 | BUG(); |
618 | left -= len; | 614 | left -= len; |
619 | 615 | ||
620 | /* | 616 | /* |
621 | * Fill in the new header fields. | 617 | * Fill in the new header fields. |
622 | */ | 618 | */ |
623 | iph = skb2->nh.iph; | 619 | iph = ip_hdr(skb2); |
624 | iph->frag_off = htons((offset >> 3)); | 620 | iph->frag_off = htons((offset >> 3)); |
625 | 621 | ||
626 | /* ANK: dirty, but effective trick. Upgrade options only if | 622 | /* ANK: dirty, but effective trick. Upgrade options only if |
@@ -722,10 +718,10 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
722 | skb_put(skb,fragheaderlen + transhdrlen); | 718 | skb_put(skb,fragheaderlen + transhdrlen); |
723 | 719 | ||
724 | /* initialize network header pointer */ | 720 | /* initialize network header pointer */ |
725 | skb->nh.raw = skb->data; | 721 | skb_reset_network_header(skb); |
726 | 722 | ||
727 | /* initialize protocol header pointer */ | 723 | /* initialize protocol header pointer */ |
728 | skb->h.raw = skb->data + fragheaderlen; | 724 | skb->transport_header = skb->network_header + fragheaderlen; |
729 | 725 | ||
730 | skb->ip_summed = CHECKSUM_PARTIAL; | 726 | skb->ip_summed = CHECKSUM_PARTIAL; |
731 | skb->csum = 0; | 727 | skb->csum = 0; |
@@ -799,7 +795,9 @@ int ip_append_data(struct sock *sk, | |||
799 | inet->cork.addr = ipc->addr; | 795 | inet->cork.addr = ipc->addr; |
800 | } | 796 | } |
801 | dst_hold(&rt->u.dst); | 797 | dst_hold(&rt->u.dst); |
802 | inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); | 798 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
799 | rt->u.dst.dev->mtu : | ||
800 | dst_mtu(rt->u.dst.path); | ||
803 | inet->cork.rt = rt; | 801 | inet->cork.rt = rt; |
804 | inet->cork.length = 0; | 802 | inet->cork.length = 0; |
805 | sk->sk_sndmsg_page = NULL; | 803 | sk->sk_sndmsg_page = NULL; |
@@ -929,9 +927,10 @@ alloc_new_skb: | |||
929 | * Find where to start putting bytes. | 927 | * Find where to start putting bytes. |
930 | */ | 928 | */ |
931 | data = skb_put(skb, fraglen); | 929 | data = skb_put(skb, fraglen); |
932 | skb->nh.raw = data + exthdrlen; | 930 | skb_set_network_header(skb, exthdrlen); |
931 | skb->transport_header = (skb->network_header + | ||
932 | fragheaderlen); | ||
933 | data += fragheaderlen; | 933 | data += fragheaderlen; |
934 | skb->h.raw = data + exthdrlen; | ||
935 | 934 | ||
936 | if (fraggap) { | 935 | if (fraggap) { |
937 | skb->csum = skb_copy_and_csum_bits( | 936 | skb->csum = skb_copy_and_csum_bits( |
@@ -1100,8 +1099,6 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1100 | } | 1099 | } |
1101 | if (len <= 0) { | 1100 | if (len <= 0) { |
1102 | struct sk_buff *skb_prev; | 1101 | struct sk_buff *skb_prev; |
1103 | char *data; | ||
1104 | struct iphdr *iph; | ||
1105 | int alloclen; | 1102 | int alloclen; |
1106 | 1103 | ||
1107 | skb_prev = skb; | 1104 | skb_prev = skb; |
@@ -1124,15 +1121,15 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1124 | /* | 1121 | /* |
1125 | * Find where to start putting bytes. | 1122 | * Find where to start putting bytes. |
1126 | */ | 1123 | */ |
1127 | data = skb_put(skb, fragheaderlen + fraggap); | 1124 | skb_put(skb, fragheaderlen + fraggap); |
1128 | skb->nh.iph = iph = (struct iphdr *)data; | 1125 | skb_reset_network_header(skb); |
1129 | data += fragheaderlen; | 1126 | skb->transport_header = (skb->network_header + |
1130 | skb->h.raw = data; | 1127 | fragheaderlen); |
1131 | |||
1132 | if (fraggap) { | 1128 | if (fraggap) { |
1133 | skb->csum = skb_copy_and_csum_bits( | 1129 | skb->csum = skb_copy_and_csum_bits(skb_prev, |
1134 | skb_prev, maxfraglen, | 1130 | maxfraglen, |
1135 | data, fraggap, 0); | 1131 | skb_transport_header(skb), |
1132 | fraggap, 0); | ||
1136 | skb_prev->csum = csum_sub(skb_prev->csum, | 1133 | skb_prev->csum = csum_sub(skb_prev->csum, |
1137 | skb->csum); | 1134 | skb->csum); |
1138 | pskb_trim_unique(skb_prev, maxfraglen); | 1135 | pskb_trim_unique(skb_prev, maxfraglen); |
@@ -1198,10 +1195,10 @@ int ip_push_pending_frames(struct sock *sk) | |||
1198 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1195 | tail_skb = &(skb_shinfo(skb)->frag_list); |
1199 | 1196 | ||
1200 | /* move skb->data to ip header from ext header */ | 1197 | /* move skb->data to ip header from ext header */ |
1201 | if (skb->data < skb->nh.raw) | 1198 | if (skb->data < skb_network_header(skb)) |
1202 | __skb_pull(skb, skb->nh.raw - skb->data); | 1199 | __skb_pull(skb, skb_network_offset(skb)); |
1203 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { | 1200 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { |
1204 | __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); | 1201 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
1205 | *tail_skb = tmp_skb; | 1202 | *tail_skb = tmp_skb; |
1206 | tail_skb = &(tmp_skb->next); | 1203 | tail_skb = &(tmp_skb->next); |
1207 | skb->len += tmp_skb->len; | 1204 | skb->len += tmp_skb->len; |
@@ -1216,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk) | |||
1216 | * to fragment the frame generated here. No matter, what transforms | 1213 | * to fragment the frame generated here. No matter, what transforms |
1217 | * how transforms change size of the packet, it will come out. | 1214 | * how transforms change size of the packet, it will come out. |
1218 | */ | 1215 | */ |
1219 | if (inet->pmtudisc != IP_PMTUDISC_DO) | 1216 | if (inet->pmtudisc < IP_PMTUDISC_DO) |
1220 | skb->local_df = 1; | 1217 | skb->local_df = 1; |
1221 | 1218 | ||
1222 | /* DF bit is set when we want to see DF on outgoing frames. | 1219 | /* DF bit is set when we want to see DF on outgoing frames. |
1223 | * If local_df is set too, we still allow to fragment this frame | 1220 | * If local_df is set too, we still allow to fragment this frame |
1224 | * locally. */ | 1221 | * locally. */ |
1225 | if (inet->pmtudisc == IP_PMTUDISC_DO || | 1222 | if (inet->pmtudisc >= IP_PMTUDISC_DO || |
1226 | (skb->len <= dst_mtu(&rt->u.dst) && | 1223 | (skb->len <= dst_mtu(&rt->u.dst) && |
1227 | ip_dont_fragment(sk, &rt->u.dst))) | 1224 | ip_dont_fragment(sk, &rt->u.dst))) |
1228 | df = htons(IP_DF); | 1225 | df = htons(IP_DF); |
@@ -1352,11 +1349,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1352 | struct flowi fl = { .nl_u = { .ip4_u = | 1349 | struct flowi fl = { .nl_u = { .ip4_u = |
1353 | { .daddr = daddr, | 1350 | { .daddr = daddr, |
1354 | .saddr = rt->rt_spec_dst, | 1351 | .saddr = rt->rt_spec_dst, |
1355 | .tos = RT_TOS(skb->nh.iph->tos) } }, | 1352 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, |
1356 | /* Not quite clean, but right. */ | 1353 | /* Not quite clean, but right. */ |
1357 | .uli_u = { .ports = | 1354 | .uli_u = { .ports = |
1358 | { .sport = skb->h.th->dest, | 1355 | { .sport = tcp_hdr(skb)->dest, |
1359 | .dport = skb->h.th->source } }, | 1356 | .dport = tcp_hdr(skb)->source } }, |
1360 | .proto = sk->sk_protocol }; | 1357 | .proto = sk->sk_protocol }; |
1361 | security_skb_classify_flow(skb, &fl); | 1358 | security_skb_classify_flow(skb, &fl); |
1362 | if (ip_route_output_key(&rt, &fl)) | 1359 | if (ip_route_output_key(&rt, &fl)) |
@@ -1370,14 +1367,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1370 | with locally disabled BH and that sk cannot be already spinlocked. | 1367 | with locally disabled BH and that sk cannot be already spinlocked. |
1371 | */ | 1368 | */ |
1372 | bh_lock_sock(sk); | 1369 | bh_lock_sock(sk); |
1373 | inet->tos = skb->nh.iph->tos; | 1370 | inet->tos = ip_hdr(skb)->tos; |
1374 | sk->sk_priority = skb->priority; | 1371 | sk->sk_priority = skb->priority; |
1375 | sk->sk_protocol = skb->nh.iph->protocol; | 1372 | sk->sk_protocol = ip_hdr(skb)->protocol; |
1376 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, | 1373 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, |
1377 | &ipc, rt, MSG_DONTWAIT); | 1374 | &ipc, rt, MSG_DONTWAIT); |
1378 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 1375 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
1379 | if (arg->csumoffset >= 0) | 1376 | if (arg->csumoffset >= 0) |
1380 | *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum)); | 1377 | *((__sum16 *)skb_transport_header(skb) + |
1378 | arg->csumoffset) = csum_fold(csum_add(skb->csum, | ||
1379 | arg->csum)); | ||
1381 | skb->ip_summed = CHECKSUM_NONE; | 1380 | skb->ip_summed = CHECKSUM_NONE; |
1382 | ip_push_pending_frames(sk); | 1381 | ip_push_pending_frames(sk); |
1383 | } | 1382 | } |