aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ip_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r--net/ipv4/ip_output.c123
1 files changed, 61 insertions, 62 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d096332f6c6d..534650cad3a8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,8 +95,8 @@ __inline__ void ip_send_check(struct iphdr *iph)
95/* dev_loopback_xmit for use with netfilter. */ 95/* dev_loopback_xmit for use with netfilter. */
96static int ip_dev_loopback_xmit(struct sk_buff *newskb) 96static int ip_dev_loopback_xmit(struct sk_buff *newskb)
97{ 97{
98 newskb->mac.raw = newskb->data; 98 skb_reset_mac_header(newskb);
99 __skb_pull(newskb, newskb->nh.raw - newskb->data); 99 __skb_pull(newskb, skb_network_offset(newskb));
100 newskb->pkt_type = PACKET_LOOPBACK; 100 newskb->pkt_type = PACKET_LOOPBACK;
101 newskb->ip_summed = CHECKSUM_UNNECESSARY; 101 newskb->ip_summed = CHECKSUM_UNNECESSARY;
102 BUG_TRAP(newskb->dst); 102 BUG_TRAP(newskb->dst);
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
125 struct iphdr *iph; 125 struct iphdr *iph;
126 126
127 /* Build the IP header. */ 127 /* Build the IP header. */
128 if (opt) 128 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
129 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen); 129 skb_reset_network_header(skb);
130 else 130 iph = ip_hdr(skb);
131 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
132
133 iph->version = 4; 131 iph->version = 4;
134 iph->ihl = 5; 132 iph->ihl = 5;
135 iph->tos = inet->tos; 133 iph->tos = inet->tos;
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 iph->protocol = sk->sk_protocol; 141 iph->protocol = sk->sk_protocol;
144 iph->tot_len = htons(skb->len); 142 iph->tot_len = htons(skb->len);
145 ip_select_ident(iph, &rt->u.dst, sk); 143 ip_select_ident(iph, &rt->u.dst, sk);
146 skb->nh.iph = iph;
147 144
148 if (opt && opt->optlen) { 145 if (opt && opt->optlen) {
149 iph->ihl += opt->optlen>>2; 146 iph->ihl += opt->optlen>>2;
@@ -192,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
192 return -EINVAL; 189 return -EINVAL;
193} 190}
194 191
192static inline int ip_skb_dst_mtu(struct sk_buff *skb)
193{
194 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
195
196 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
197 skb->dst->dev->mtu : dst_mtu(skb->dst);
198}
199
195static inline int ip_finish_output(struct sk_buff *skb) 200static inline int ip_finish_output(struct sk_buff *skb)
196{ 201{
197#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) 202#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -201,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
201 return dst_output(skb); 206 return dst_output(skb);
202 } 207 }
203#endif 208#endif
204 if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) 209 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
205 return ip_fragment(skb, ip_finish_output2); 210 return ip_fragment(skb, ip_finish_output2);
206 else 211 else
207 return ip_finish_output2(skb); 212 return ip_finish_output2(skb);
@@ -248,7 +253,7 @@ int ip_mc_output(struct sk_buff *skb)
248 253
249 /* Multicasts with ttl 0 must not go beyond the host */ 254 /* Multicasts with ttl 0 must not go beyond the host */
250 255
251 if (skb->nh.iph->ttl == 0) { 256 if (ip_hdr(skb)->ttl == 0) {
252 kfree_skb(skb); 257 kfree_skb(skb);
253 return 0; 258 return 0;
254 } 259 }
@@ -333,7 +338,9 @@ packet_routed:
333 goto no_route; 338 goto no_route;
334 339
335 /* OK, we know where to send it, allocate and build IP header. */ 340 /* OK, we know where to send it, allocate and build IP header. */
336 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); 341 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
342 skb_reset_network_header(skb);
343 iph = ip_hdr(skb);
337 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 344 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
338 iph->tot_len = htons(skb->len); 345 iph->tot_len = htons(skb->len);
339 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 346 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
@@ -344,7 +351,6 @@ packet_routed:
344 iph->protocol = sk->sk_protocol; 351 iph->protocol = sk->sk_protocol;
345 iph->saddr = rt->rt_src; 352 iph->saddr = rt->rt_src;
346 iph->daddr = rt->rt_dst; 353 iph->daddr = rt->rt_dst;
347 skb->nh.iph = iph;
348 /* Transport layer set skb->h.foo itself. */ 354 /* Transport layer set skb->h.foo itself. */
349 355
350 if (opt && opt->optlen) { 356 if (opt && opt->optlen) {
@@ -386,21 +392,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
386#ifdef CONFIG_NET_SCHED 392#ifdef CONFIG_NET_SCHED
387 to->tc_index = from->tc_index; 393 to->tc_index = from->tc_index;
388#endif 394#endif
389#ifdef CONFIG_NETFILTER 395 nf_copy(to, from);
390 /* Connection association is same as pre-frag packet */
391 nf_conntrack_put(to->nfct);
392 to->nfct = from->nfct;
393 nf_conntrack_get(to->nfct);
394 to->nfctinfo = from->nfctinfo;
395#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 396#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
396 to->ipvs_property = from->ipvs_property; 397 to->ipvs_property = from->ipvs_property;
397#endif 398#endif
398#ifdef CONFIG_BRIDGE_NETFILTER
399 nf_bridge_put(to->nf_bridge);
400 to->nf_bridge = from->nf_bridge;
401 nf_bridge_get(to->nf_bridge);
402#endif
403#endif
404 skb_copy_secmark(to, from); 399 skb_copy_secmark(to, from);
405} 400}
406 401
@@ -430,12 +425,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
430 * Point into the IP datagram header. 425 * Point into the IP datagram header.
431 */ 426 */
432 427
433 iph = skb->nh.iph; 428 iph = ip_hdr(skb);
434 429
435 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { 430 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
436 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 431 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
437 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 432 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
438 htonl(dst_mtu(&rt->u.dst))); 433 htonl(ip_skb_dst_mtu(skb)));
439 kfree_skb(skb); 434 kfree_skb(skb);
440 return -EMSGSIZE; 435 return -EMSGSIZE;
441 } 436 }
@@ -502,10 +497,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
502 * before previous one went down. */ 497 * before previous one went down. */
503 if (frag) { 498 if (frag) {
504 frag->ip_summed = CHECKSUM_NONE; 499 frag->ip_summed = CHECKSUM_NONE;
505 frag->h.raw = frag->data; 500 skb_reset_transport_header(frag);
506 frag->nh.raw = __skb_push(frag, hlen); 501 __skb_push(frag, hlen);
507 memcpy(frag->nh.raw, iph, hlen); 502 skb_reset_network_header(frag);
508 iph = frag->nh.iph; 503 memcpy(skb_network_header(frag), iph, hlen);
504 iph = ip_hdr(frag);
509 iph->tot_len = htons(frag->len); 505 iph->tot_len = htons(frag->len);
510 ip_copy_metadata(frag, skb); 506 ip_copy_metadata(frag, skb);
511 if (offset == 0) 507 if (offset == 0)
@@ -566,7 +562,7 @@ slow_path:
566 * Keep copying data until we run out. 562 * Keep copying data until we run out.
567 */ 563 */
568 564
569 while(left > 0) { 565 while (left > 0) {
570 len = left; 566 len = left;
571 /* IF: it doesn't fit, use 'mtu' - the data space left */ 567 /* IF: it doesn't fit, use 'mtu' - the data space left */
572 if (len > mtu) 568 if (len > mtu)
@@ -593,8 +589,8 @@ slow_path:
593 ip_copy_metadata(skb2, skb); 589 ip_copy_metadata(skb2, skb);
594 skb_reserve(skb2, ll_rs); 590 skb_reserve(skb2, ll_rs);
595 skb_put(skb2, len + hlen); 591 skb_put(skb2, len + hlen);
596 skb2->nh.raw = skb2->data; 592 skb_reset_network_header(skb2);
597 skb2->h.raw = skb2->data + hlen; 593 skb2->transport_header = skb2->network_header + hlen;
598 594
599 /* 595 /*
600 * Charge the memory for the fragment to any owner 596 * Charge the memory for the fragment to any owner
@@ -608,19 +604,19 @@ slow_path:
608 * Copy the packet header into the new buffer. 604 * Copy the packet header into the new buffer.
609 */ 605 */
610 606
611 memcpy(skb2->nh.raw, skb->data, hlen); 607 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
612 608
613 /* 609 /*
614 * Copy a block of the IP datagram. 610 * Copy a block of the IP datagram.
615 */ 611 */
616 if (skb_copy_bits(skb, ptr, skb2->h.raw, len)) 612 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
617 BUG(); 613 BUG();
618 left -= len; 614 left -= len;
619 615
620 /* 616 /*
621 * Fill in the new header fields. 617 * Fill in the new header fields.
622 */ 618 */
623 iph = skb2->nh.iph; 619 iph = ip_hdr(skb2);
624 iph->frag_off = htons((offset >> 3)); 620 iph->frag_off = htons((offset >> 3));
625 621
626 /* ANK: dirty, but effective trick. Upgrade options only if 622 /* ANK: dirty, but effective trick. Upgrade options only if
@@ -722,10 +718,10 @@ static inline int ip_ufo_append_data(struct sock *sk,
722 skb_put(skb,fragheaderlen + transhdrlen); 718 skb_put(skb,fragheaderlen + transhdrlen);
723 719
724 /* initialize network header pointer */ 720 /* initialize network header pointer */
725 skb->nh.raw = skb->data; 721 skb_reset_network_header(skb);
726 722
727 /* initialize protocol header pointer */ 723 /* initialize protocol header pointer */
728 skb->h.raw = skb->data + fragheaderlen; 724 skb->transport_header = skb->network_header + fragheaderlen;
729 725
730 skb->ip_summed = CHECKSUM_PARTIAL; 726 skb->ip_summed = CHECKSUM_PARTIAL;
731 skb->csum = 0; 727 skb->csum = 0;
@@ -799,7 +795,9 @@ int ip_append_data(struct sock *sk,
799 inet->cork.addr = ipc->addr; 795 inet->cork.addr = ipc->addr;
800 } 796 }
801 dst_hold(&rt->u.dst); 797 dst_hold(&rt->u.dst);
802 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); 798 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
799 rt->u.dst.dev->mtu :
800 dst_mtu(rt->u.dst.path);
803 inet->cork.rt = rt; 801 inet->cork.rt = rt;
804 inet->cork.length = 0; 802 inet->cork.length = 0;
805 sk->sk_sndmsg_page = NULL; 803 sk->sk_sndmsg_page = NULL;
@@ -929,9 +927,10 @@ alloc_new_skb:
929 * Find where to start putting bytes. 927 * Find where to start putting bytes.
930 */ 928 */
931 data = skb_put(skb, fraglen); 929 data = skb_put(skb, fraglen);
932 skb->nh.raw = data + exthdrlen; 930 skb_set_network_header(skb, exthdrlen);
931 skb->transport_header = (skb->network_header +
932 fragheaderlen);
933 data += fragheaderlen; 933 data += fragheaderlen;
934 skb->h.raw = data + exthdrlen;
935 934
936 if (fraggap) { 935 if (fraggap) {
937 skb->csum = skb_copy_and_csum_bits( 936 skb->csum = skb_copy_and_csum_bits(
@@ -1100,8 +1099,6 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1100 } 1099 }
1101 if (len <= 0) { 1100 if (len <= 0) {
1102 struct sk_buff *skb_prev; 1101 struct sk_buff *skb_prev;
1103 char *data;
1104 struct iphdr *iph;
1105 int alloclen; 1102 int alloclen;
1106 1103
1107 skb_prev = skb; 1104 skb_prev = skb;
@@ -1124,15 +1121,15 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1124 /* 1121 /*
1125 * Find where to start putting bytes. 1122 * Find where to start putting bytes.
1126 */ 1123 */
1127 data = skb_put(skb, fragheaderlen + fraggap); 1124 skb_put(skb, fragheaderlen + fraggap);
1128 skb->nh.iph = iph = (struct iphdr *)data; 1125 skb_reset_network_header(skb);
1129 data += fragheaderlen; 1126 skb->transport_header = (skb->network_header +
1130 skb->h.raw = data; 1127 fragheaderlen);
1131
1132 if (fraggap) { 1128 if (fraggap) {
1133 skb->csum = skb_copy_and_csum_bits( 1129 skb->csum = skb_copy_and_csum_bits(skb_prev,
1134 skb_prev, maxfraglen, 1130 maxfraglen,
1135 data, fraggap, 0); 1131 skb_transport_header(skb),
1132 fraggap, 0);
1136 skb_prev->csum = csum_sub(skb_prev->csum, 1133 skb_prev->csum = csum_sub(skb_prev->csum,
1137 skb->csum); 1134 skb->csum);
1138 pskb_trim_unique(skb_prev, maxfraglen); 1135 pskb_trim_unique(skb_prev, maxfraglen);
@@ -1198,10 +1195,10 @@ int ip_push_pending_frames(struct sock *sk)
1198 tail_skb = &(skb_shinfo(skb)->frag_list); 1195 tail_skb = &(skb_shinfo(skb)->frag_list);
1199 1196
1200 /* move skb->data to ip header from ext header */ 1197 /* move skb->data to ip header from ext header */
1201 if (skb->data < skb->nh.raw) 1198 if (skb->data < skb_network_header(skb))
1202 __skb_pull(skb, skb->nh.raw - skb->data); 1199 __skb_pull(skb, skb_network_offset(skb));
1203 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { 1200 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1204 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw); 1201 __skb_pull(tmp_skb, skb_network_header_len(skb));
1205 *tail_skb = tmp_skb; 1202 *tail_skb = tmp_skb;
1206 tail_skb = &(tmp_skb->next); 1203 tail_skb = &(tmp_skb->next);
1207 skb->len += tmp_skb->len; 1204 skb->len += tmp_skb->len;
@@ -1216,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
1216 * to fragment the frame generated here. No matter, what transforms 1213 * to fragment the frame generated here. No matter, what transforms
1217 * how transforms change size of the packet, it will come out. 1214 * how transforms change size of the packet, it will come out.
1218 */ 1215 */
1219 if (inet->pmtudisc != IP_PMTUDISC_DO) 1216 if (inet->pmtudisc < IP_PMTUDISC_DO)
1220 skb->local_df = 1; 1217 skb->local_df = 1;
1221 1218
1222 /* DF bit is set when we want to see DF on outgoing frames. 1219 /* DF bit is set when we want to see DF on outgoing frames.
1223 * If local_df is set too, we still allow to fragment this frame 1220 * If local_df is set too, we still allow to fragment this frame
1224 * locally. */ 1221 * locally. */
1225 if (inet->pmtudisc == IP_PMTUDISC_DO || 1222 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1226 (skb->len <= dst_mtu(&rt->u.dst) && 1223 (skb->len <= dst_mtu(&rt->u.dst) &&
1227 ip_dont_fragment(sk, &rt->u.dst))) 1224 ip_dont_fragment(sk, &rt->u.dst)))
1228 df = htons(IP_DF); 1225 df = htons(IP_DF);
@@ -1352,11 +1349,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1352 struct flowi fl = { .nl_u = { .ip4_u = 1349 struct flowi fl = { .nl_u = { .ip4_u =
1353 { .daddr = daddr, 1350 { .daddr = daddr,
1354 .saddr = rt->rt_spec_dst, 1351 .saddr = rt->rt_spec_dst,
1355 .tos = RT_TOS(skb->nh.iph->tos) } }, 1352 .tos = RT_TOS(ip_hdr(skb)->tos) } },
1356 /* Not quite clean, but right. */ 1353 /* Not quite clean, but right. */
1357 .uli_u = { .ports = 1354 .uli_u = { .ports =
1358 { .sport = skb->h.th->dest, 1355 { .sport = tcp_hdr(skb)->dest,
1359 .dport = skb->h.th->source } }, 1356 .dport = tcp_hdr(skb)->source } },
1360 .proto = sk->sk_protocol }; 1357 .proto = sk->sk_protocol };
1361 security_skb_classify_flow(skb, &fl); 1358 security_skb_classify_flow(skb, &fl);
1362 if (ip_route_output_key(&rt, &fl)) 1359 if (ip_route_output_key(&rt, &fl))
@@ -1370,14 +1367,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1370 with locally disabled BH and that sk cannot be already spinlocked. 1367 with locally disabled BH and that sk cannot be already spinlocked.
1371 */ 1368 */
1372 bh_lock_sock(sk); 1369 bh_lock_sock(sk);
1373 inet->tos = skb->nh.iph->tos; 1370 inet->tos = ip_hdr(skb)->tos;
1374 sk->sk_priority = skb->priority; 1371 sk->sk_priority = skb->priority;
1375 sk->sk_protocol = skb->nh.iph->protocol; 1372 sk->sk_protocol = ip_hdr(skb)->protocol;
1376 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, 1373 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1377 &ipc, rt, MSG_DONTWAIT); 1374 &ipc, rt, MSG_DONTWAIT);
1378 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 1375 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1379 if (arg->csumoffset >= 0) 1376 if (arg->csumoffset >= 0)
1380 *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum)); 1377 *((__sum16 *)skb_transport_header(skb) +
1378 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1379 arg->csum));
1381 skb->ip_summed = CHECKSUM_NONE; 1380 skb->ip_summed = CHECKSUM_NONE;
1382 ip_push_pending_frames(sk); 1381 ip_push_pending_frames(sk);
1383 } 1382 }