diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/ipv4/ip_output.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r-- | net/ipv4/ip_output.c | 474 |
1 files changed, 274 insertions, 200 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7649d7750075..84f26e8e6c60 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -82,6 +82,7 @@ | |||
82 | #include <linux/tcp.h> | 82 | #include <linux/tcp.h> |
83 | 83 | ||
84 | int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; | 84 | int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; |
85 | EXPORT_SYMBOL(sysctl_ip_default_ttl); | ||
85 | 86 | ||
86 | /* Generate a checksum for an outgoing IP datagram. */ | 87 | /* Generate a checksum for an outgoing IP datagram. */ |
87 | __inline__ void ip_send_check(struct iphdr *iph) | 88 | __inline__ void ip_send_check(struct iphdr *iph) |
@@ -130,7 +131,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | |||
130 | int ttl = inet->uc_ttl; | 131 | int ttl = inet->uc_ttl; |
131 | 132 | ||
132 | if (ttl < 0) | 133 | if (ttl < 0) |
133 | ttl = dst_metric(dst, RTAX_HOPLIMIT); | 134 | ttl = ip4_dst_hoplimit(dst); |
134 | return ttl; | 135 | return ttl; |
135 | } | 136 | } |
136 | 137 | ||
@@ -139,14 +140,14 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) | |||
139 | * | 140 | * |
140 | */ | 141 | */ |
141 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | 142 | int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, |
142 | __be32 saddr, __be32 daddr, struct ip_options *opt) | 143 | __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) |
143 | { | 144 | { |
144 | struct inet_sock *inet = inet_sk(sk); | 145 | struct inet_sock *inet = inet_sk(sk); |
145 | struct rtable *rt = skb_rtable(skb); | 146 | struct rtable *rt = skb_rtable(skb); |
146 | struct iphdr *iph; | 147 | struct iphdr *iph; |
147 | 148 | ||
148 | /* Build the IP header. */ | 149 | /* Build the IP header. */ |
149 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 150 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); |
150 | skb_reset_network_header(skb); | 151 | skb_reset_network_header(skb); |
151 | iph = ip_hdr(skb); | 152 | iph = ip_hdr(skb); |
152 | iph->version = 4; | 153 | iph->version = 4; |
@@ -157,14 +158,14 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
157 | else | 158 | else |
158 | iph->frag_off = 0; | 159 | iph->frag_off = 0; |
159 | iph->ttl = ip_select_ttl(inet, &rt->dst); | 160 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
160 | iph->daddr = rt->rt_dst; | 161 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); |
161 | iph->saddr = rt->rt_src; | 162 | iph->saddr = saddr; |
162 | iph->protocol = sk->sk_protocol; | 163 | iph->protocol = sk->sk_protocol; |
163 | ip_select_ident(iph, &rt->dst, sk); | 164 | ip_select_ident(iph, &rt->dst, sk); |
164 | 165 | ||
165 | if (opt && opt->optlen) { | 166 | if (opt && opt->opt.optlen) { |
166 | iph->ihl += opt->optlen>>2; | 167 | iph->ihl += opt->opt.optlen>>2; |
167 | ip_options_build(skb, opt, daddr, rt, 0); | 168 | ip_options_build(skb, &opt->opt, daddr, rt, 0); |
168 | } | 169 | } |
169 | 170 | ||
170 | skb->priority = sk->sk_priority; | 171 | skb->priority = sk->sk_priority; |
@@ -311,11 +312,12 @@ int ip_output(struct sk_buff *skb) | |||
311 | !(IPCB(skb)->flags & IPSKB_REROUTED)); | 312 | !(IPCB(skb)->flags & IPSKB_REROUTED)); |
312 | } | 313 | } |
313 | 314 | ||
314 | int ip_queue_xmit(struct sk_buff *skb) | 315 | int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) |
315 | { | 316 | { |
316 | struct sock *sk = skb->sk; | 317 | struct sock *sk = skb->sk; |
317 | struct inet_sock *inet = inet_sk(sk); | 318 | struct inet_sock *inet = inet_sk(sk); |
318 | struct ip_options *opt = inet->opt; | 319 | struct ip_options_rcu *inet_opt; |
320 | struct flowi4 *fl4; | ||
319 | struct rtable *rt; | 321 | struct rtable *rt; |
320 | struct iphdr *iph; | 322 | struct iphdr *iph; |
321 | int res; | 323 | int res; |
@@ -324,6 +326,8 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
324 | * f.e. by something like SCTP. | 326 | * f.e. by something like SCTP. |
325 | */ | 327 | */ |
326 | rcu_read_lock(); | 328 | rcu_read_lock(); |
329 | inet_opt = rcu_dereference(inet->inet_opt); | ||
330 | fl4 = &fl->u.ip4; | ||
327 | rt = skb_rtable(skb); | 331 | rt = skb_rtable(skb); |
328 | if (rt != NULL) | 332 | if (rt != NULL) |
329 | goto packet_routed; | 333 | goto packet_routed; |
@@ -335,40 +339,32 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
335 | 339 | ||
336 | /* Use correct destination address if we have options. */ | 340 | /* Use correct destination address if we have options. */ |
337 | daddr = inet->inet_daddr; | 341 | daddr = inet->inet_daddr; |
338 | if(opt && opt->srr) | 342 | if (inet_opt && inet_opt->opt.srr) |
339 | daddr = opt->faddr; | 343 | daddr = inet_opt->opt.faddr; |
340 | 344 | ||
341 | { | 345 | /* If this fails, retransmit mechanism of transport layer will |
342 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 346 | * keep trying until route appears or the connection times |
343 | .mark = sk->sk_mark, | 347 | * itself out. |
344 | .nl_u = { .ip4_u = | 348 | */ |
345 | { .daddr = daddr, | 349 | rt = ip_route_output_ports(sock_net(sk), fl4, sk, |
346 | .saddr = inet->inet_saddr, | 350 | daddr, inet->inet_saddr, |
347 | .tos = RT_CONN_FLAGS(sk) } }, | 351 | inet->inet_dport, |
348 | .proto = sk->sk_protocol, | 352 | inet->inet_sport, |
349 | .flags = inet_sk_flowi_flags(sk), | 353 | sk->sk_protocol, |
350 | .uli_u = { .ports = | 354 | RT_CONN_FLAGS(sk), |
351 | { .sport = inet->inet_sport, | 355 | sk->sk_bound_dev_if); |
352 | .dport = inet->inet_dport } } }; | 356 | if (IS_ERR(rt)) |
353 | 357 | goto no_route; | |
354 | /* If this fails, retransmit mechanism of transport layer will | ||
355 | * keep trying until route appears or the connection times | ||
356 | * itself out. | ||
357 | */ | ||
358 | security_sk_classify_flow(sk, &fl); | ||
359 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) | ||
360 | goto no_route; | ||
361 | } | ||
362 | sk_setup_caps(sk, &rt->dst); | 358 | sk_setup_caps(sk, &rt->dst); |
363 | } | 359 | } |
364 | skb_dst_set_noref(skb, &rt->dst); | 360 | skb_dst_set_noref(skb, &rt->dst); |
365 | 361 | ||
366 | packet_routed: | 362 | packet_routed: |
367 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 363 | if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) |
368 | goto no_route; | 364 | goto no_route; |
369 | 365 | ||
370 | /* OK, we know where to send it, allocate and build IP header. */ | 366 | /* OK, we know where to send it, allocate and build IP header. */ |
371 | skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0)); | 367 | skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); |
372 | skb_reset_network_header(skb); | 368 | skb_reset_network_header(skb); |
373 | iph = ip_hdr(skb); | 369 | iph = ip_hdr(skb); |
374 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 370 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
@@ -378,13 +374,13 @@ packet_routed: | |||
378 | iph->frag_off = 0; | 374 | iph->frag_off = 0; |
379 | iph->ttl = ip_select_ttl(inet, &rt->dst); | 375 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
380 | iph->protocol = sk->sk_protocol; | 376 | iph->protocol = sk->sk_protocol; |
381 | iph->saddr = rt->rt_src; | 377 | iph->saddr = fl4->saddr; |
382 | iph->daddr = rt->rt_dst; | 378 | iph->daddr = fl4->daddr; |
383 | /* Transport layer set skb->h.foo itself. */ | 379 | /* Transport layer set skb->h.foo itself. */ |
384 | 380 | ||
385 | if (opt && opt->optlen) { | 381 | if (inet_opt && inet_opt->opt.optlen) { |
386 | iph->ihl += opt->optlen >> 2; | 382 | iph->ihl += inet_opt->opt.optlen >> 2; |
387 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); | 383 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); |
388 | } | 384 | } |
389 | 385 | ||
390 | ip_select_ident_more(iph, &rt->dst, sk, | 386 | ip_select_ident_more(iph, &rt->dst, sk, |
@@ -487,7 +483,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
487 | * LATER: this step can be merged to real generation of fragments, | 483 | * LATER: this step can be merged to real generation of fragments, |
488 | * we can switch to copy when see the first bad fragment. | 484 | * we can switch to copy when see the first bad fragment. |
489 | */ | 485 | */ |
490 | if (skb_has_frags(skb)) { | 486 | if (skb_has_frag_list(skb)) { |
491 | struct sk_buff *frag, *frag2; | 487 | struct sk_buff *frag, *frag2; |
492 | int first_len = skb_pagelen(skb); | 488 | int first_len = skb_pagelen(skb); |
493 | 489 | ||
@@ -610,7 +606,7 @@ slow_path: | |||
610 | /* IF: it doesn't fit, use 'mtu' - the data space left */ | 606 | /* IF: it doesn't fit, use 'mtu' - the data space left */ |
611 | if (len > mtu) | 607 | if (len > mtu) |
612 | len = mtu; | 608 | len = mtu; |
613 | /* IF: we are not sending upto and including the packet end | 609 | /* IF: we are not sending up to and including the packet end |
614 | then align the next start on an eight byte boundary */ | 610 | then align the next start on an eight byte boundary */ |
615 | if (len < left) { | 611 | if (len < left) { |
616 | len &= ~7; | 612 | len &= ~7; |
@@ -734,6 +730,7 @@ csum_page(struct page *page, int offset, int copy) | |||
734 | } | 730 | } |
735 | 731 | ||
736 | static inline int ip_ufo_append_data(struct sock *sk, | 732 | static inline int ip_ufo_append_data(struct sock *sk, |
733 | struct sk_buff_head *queue, | ||
737 | int getfrag(void *from, char *to, int offset, int len, | 734 | int getfrag(void *from, char *to, int offset, int len, |
738 | int odd, struct sk_buff *skb), | 735 | int odd, struct sk_buff *skb), |
739 | void *from, int length, int hh_len, int fragheaderlen, | 736 | void *from, int length, int hh_len, int fragheaderlen, |
@@ -746,7 +743,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
746 | * device, so create one single skb packet containing complete | 743 | * device, so create one single skb packet containing complete |
747 | * udp datagram | 744 | * udp datagram |
748 | */ | 745 | */ |
749 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | 746 | if ((skb = skb_peek_tail(queue)) == NULL) { |
750 | skb = sock_alloc_send_skb(sk, | 747 | skb = sock_alloc_send_skb(sk, |
751 | hh_len + fragheaderlen + transhdrlen + 20, | 748 | hh_len + fragheaderlen + transhdrlen + 20, |
752 | (flags & MSG_DONTWAIT), &err); | 749 | (flags & MSG_DONTWAIT), &err); |
@@ -768,40 +765,30 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
768 | 765 | ||
769 | skb->ip_summed = CHECKSUM_PARTIAL; | 766 | skb->ip_summed = CHECKSUM_PARTIAL; |
770 | skb->csum = 0; | 767 | skb->csum = 0; |
771 | sk->sk_sndmsg_off = 0; | ||
772 | 768 | ||
773 | /* specify the length of each IP datagram fragment */ | 769 | /* specify the length of each IP datagram fragment */ |
774 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 770 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; |
775 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 771 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
776 | __skb_queue_tail(&sk->sk_write_queue, skb); | 772 | __skb_queue_tail(queue, skb); |
777 | } | 773 | } |
778 | 774 | ||
779 | return skb_append_datato_frags(sk, skb, getfrag, from, | 775 | return skb_append_datato_frags(sk, skb, getfrag, from, |
780 | (length - transhdrlen)); | 776 | (length - transhdrlen)); |
781 | } | 777 | } |
782 | 778 | ||
783 | /* | 779 | static int __ip_append_data(struct sock *sk, |
784 | * ip_append_data() and ip_append_page() can make one large IP datagram | 780 | struct flowi4 *fl4, |
785 | * from many pieces of data. Each pieces will be holded on the socket | 781 | struct sk_buff_head *queue, |
786 | * until ip_push_pending_frames() is called. Each piece can be a page | 782 | struct inet_cork *cork, |
787 | * or non-page data. | 783 | int getfrag(void *from, char *to, int offset, |
788 | * | 784 | int len, int odd, struct sk_buff *skb), |
789 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | 785 | void *from, int length, int transhdrlen, |
790 | * this interface potentially. | 786 | unsigned int flags) |
791 | * | ||
792 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
793 | */ | ||
794 | int ip_append_data(struct sock *sk, | ||
795 | int getfrag(void *from, char *to, int offset, int len, | ||
796 | int odd, struct sk_buff *skb), | ||
797 | void *from, int length, int transhdrlen, | ||
798 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
799 | unsigned int flags) | ||
800 | { | 787 | { |
801 | struct inet_sock *inet = inet_sk(sk); | 788 | struct inet_sock *inet = inet_sk(sk); |
802 | struct sk_buff *skb; | 789 | struct sk_buff *skb; |
803 | 790 | ||
804 | struct ip_options *opt = NULL; | 791 | struct ip_options *opt = cork->opt; |
805 | int hh_len; | 792 | int hh_len; |
806 | int exthdrlen; | 793 | int exthdrlen; |
807 | int mtu; | 794 | int mtu; |
@@ -810,60 +797,20 @@ int ip_append_data(struct sock *sk, | |||
810 | int offset = 0; | 797 | int offset = 0; |
811 | unsigned int maxfraglen, fragheaderlen; | 798 | unsigned int maxfraglen, fragheaderlen; |
812 | int csummode = CHECKSUM_NONE; | 799 | int csummode = CHECKSUM_NONE; |
813 | struct rtable *rt; | 800 | struct rtable *rt = (struct rtable *)cork->dst; |
814 | 801 | ||
815 | if (flags&MSG_PROBE) | 802 | skb = skb_peek_tail(queue); |
816 | return 0; | ||
817 | 803 | ||
818 | if (skb_queue_empty(&sk->sk_write_queue)) { | 804 | exthdrlen = !skb ? rt->dst.header_len : 0; |
819 | /* | 805 | mtu = cork->fragsize; |
820 | * setup for corking. | ||
821 | */ | ||
822 | opt = ipc->opt; | ||
823 | if (opt) { | ||
824 | if (inet->cork.opt == NULL) { | ||
825 | inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); | ||
826 | if (unlikely(inet->cork.opt == NULL)) | ||
827 | return -ENOBUFS; | ||
828 | } | ||
829 | memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); | ||
830 | inet->cork.flags |= IPCORK_OPT; | ||
831 | inet->cork.addr = ipc->addr; | ||
832 | } | ||
833 | rt = *rtp; | ||
834 | if (unlikely(!rt)) | ||
835 | return -EFAULT; | ||
836 | /* | ||
837 | * We steal reference to this route, caller should not release it | ||
838 | */ | ||
839 | *rtp = NULL; | ||
840 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
841 | rt->dst.dev->mtu : | ||
842 | dst_mtu(rt->dst.path); | ||
843 | inet->cork.dst = &rt->dst; | ||
844 | inet->cork.length = 0; | ||
845 | sk->sk_sndmsg_page = NULL; | ||
846 | sk->sk_sndmsg_off = 0; | ||
847 | if ((exthdrlen = rt->dst.header_len) != 0) { | ||
848 | length += exthdrlen; | ||
849 | transhdrlen += exthdrlen; | ||
850 | } | ||
851 | } else { | ||
852 | rt = (struct rtable *)inet->cork.dst; | ||
853 | if (inet->cork.flags & IPCORK_OPT) | ||
854 | opt = inet->cork.opt; | ||
855 | 806 | ||
856 | transhdrlen = 0; | ||
857 | exthdrlen = 0; | ||
858 | mtu = inet->cork.fragsize; | ||
859 | } | ||
860 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 807 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
861 | 808 | ||
862 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 809 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
863 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 810 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
864 | 811 | ||
865 | if (inet->cork.length + length > 0xFFFF - fragheaderlen) { | 812 | if (cork->length + length > 0xFFFF - fragheaderlen) { |
866 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 813 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, |
867 | mtu-exthdrlen); | 814 | mtu-exthdrlen); |
868 | return -EMSGSIZE; | 815 | return -EMSGSIZE; |
869 | } | 816 | } |
@@ -878,15 +825,13 @@ int ip_append_data(struct sock *sk, | |||
878 | !exthdrlen) | 825 | !exthdrlen) |
879 | csummode = CHECKSUM_PARTIAL; | 826 | csummode = CHECKSUM_PARTIAL; |
880 | 827 | ||
881 | skb = skb_peek_tail(&sk->sk_write_queue); | 828 | cork->length += length; |
882 | |||
883 | inet->cork.length += length; | ||
884 | if (((length > mtu) || (skb && skb_is_gso(skb))) && | 829 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
885 | (sk->sk_protocol == IPPROTO_UDP) && | 830 | (sk->sk_protocol == IPPROTO_UDP) && |
886 | (rt->dst.dev->features & NETIF_F_UFO)) { | 831 | (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { |
887 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, | 832 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, |
888 | fragheaderlen, transhdrlen, mtu, | 833 | hh_len, fragheaderlen, transhdrlen, |
889 | flags); | 834 | mtu, flags); |
890 | if (err) | 835 | if (err) |
891 | goto error; | 836 | goto error; |
892 | return 0; | 837 | return 0; |
@@ -934,7 +879,9 @@ alloc_new_skb: | |||
934 | !(rt->dst.dev->features&NETIF_F_SG)) | 879 | !(rt->dst.dev->features&NETIF_F_SG)) |
935 | alloclen = mtu; | 880 | alloclen = mtu; |
936 | else | 881 | else |
937 | alloclen = datalen + fragheaderlen; | 882 | alloclen = fraglen; |
883 | |||
884 | alloclen += exthdrlen; | ||
938 | 885 | ||
939 | /* The last fragment gets additional space at tail. | 886 | /* The last fragment gets additional space at tail. |
940 | * Note, with MSG_MORE we overallocate on fragments, | 887 | * Note, with MSG_MORE we overallocate on fragments, |
@@ -960,7 +907,7 @@ alloc_new_skb: | |||
960 | else | 907 | else |
961 | /* only the initial fragment is | 908 | /* only the initial fragment is |
962 | time stamped */ | 909 | time stamped */ |
963 | ipc->shtx.flags = 0; | 910 | cork->tx_flags = 0; |
964 | } | 911 | } |
965 | if (skb == NULL) | 912 | if (skb == NULL) |
966 | goto error; | 913 | goto error; |
@@ -971,16 +918,16 @@ alloc_new_skb: | |||
971 | skb->ip_summed = csummode; | 918 | skb->ip_summed = csummode; |
972 | skb->csum = 0; | 919 | skb->csum = 0; |
973 | skb_reserve(skb, hh_len); | 920 | skb_reserve(skb, hh_len); |
974 | *skb_tx(skb) = ipc->shtx; | 921 | skb_shinfo(skb)->tx_flags = cork->tx_flags; |
975 | 922 | ||
976 | /* | 923 | /* |
977 | * Find where to start putting bytes. | 924 | * Find where to start putting bytes. |
978 | */ | 925 | */ |
979 | data = skb_put(skb, fraglen); | 926 | data = skb_put(skb, fraglen + exthdrlen); |
980 | skb_set_network_header(skb, exthdrlen); | 927 | skb_set_network_header(skb, exthdrlen); |
981 | skb->transport_header = (skb->network_header + | 928 | skb->transport_header = (skb->network_header + |
982 | fragheaderlen); | 929 | fragheaderlen); |
983 | data += fragheaderlen; | 930 | data += fragheaderlen + exthdrlen; |
984 | 931 | ||
985 | if (fraggap) { | 932 | if (fraggap) { |
986 | skb->csum = skb_copy_and_csum_bits( | 933 | skb->csum = skb_copy_and_csum_bits( |
@@ -1008,7 +955,7 @@ alloc_new_skb: | |||
1008 | /* | 955 | /* |
1009 | * Put the packet on the pending queue. | 956 | * Put the packet on the pending queue. |
1010 | */ | 957 | */ |
1011 | __skb_queue_tail(&sk->sk_write_queue, skb); | 958 | __skb_queue_tail(queue, skb); |
1012 | continue; | 959 | continue; |
1013 | } | 960 | } |
1014 | 961 | ||
@@ -1028,8 +975,8 @@ alloc_new_skb: | |||
1028 | } else { | 975 | } else { |
1029 | int i = skb_shinfo(skb)->nr_frags; | 976 | int i = skb_shinfo(skb)->nr_frags; |
1030 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; | 977 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; |
1031 | struct page *page = sk->sk_sndmsg_page; | 978 | struct page *page = cork->page; |
1032 | int off = sk->sk_sndmsg_off; | 979 | int off = cork->off; |
1033 | unsigned int left; | 980 | unsigned int left; |
1034 | 981 | ||
1035 | if (page && (left = PAGE_SIZE - off) > 0) { | 982 | if (page && (left = PAGE_SIZE - off) > 0) { |
@@ -1041,7 +988,7 @@ alloc_new_skb: | |||
1041 | goto error; | 988 | goto error; |
1042 | } | 989 | } |
1043 | get_page(page); | 990 | get_page(page); |
1044 | skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); | 991 | skb_fill_page_desc(skb, i, page, off, 0); |
1045 | frag = &skb_shinfo(skb)->frags[i]; | 992 | frag = &skb_shinfo(skb)->frags[i]; |
1046 | } | 993 | } |
1047 | } else if (i < MAX_SKB_FRAGS) { | 994 | } else if (i < MAX_SKB_FRAGS) { |
@@ -1052,8 +999,8 @@ alloc_new_skb: | |||
1052 | err = -ENOMEM; | 999 | err = -ENOMEM; |
1053 | goto error; | 1000 | goto error; |
1054 | } | 1001 | } |
1055 | sk->sk_sndmsg_page = page; | 1002 | cork->page = page; |
1056 | sk->sk_sndmsg_off = 0; | 1003 | cork->off = 0; |
1057 | 1004 | ||
1058 | skb_fill_page_desc(skb, i, page, 0, 0); | 1005 | skb_fill_page_desc(skb, i, page, 0, 0); |
1059 | frag = &skb_shinfo(skb)->frags[i]; | 1006 | frag = &skb_shinfo(skb)->frags[i]; |
@@ -1065,7 +1012,7 @@ alloc_new_skb: | |||
1065 | err = -EFAULT; | 1012 | err = -EFAULT; |
1066 | goto error; | 1013 | goto error; |
1067 | } | 1014 | } |
1068 | sk->sk_sndmsg_off += copy; | 1015 | cork->off += copy; |
1069 | frag->size += copy; | 1016 | frag->size += copy; |
1070 | skb->len += copy; | 1017 | skb->len += copy; |
1071 | skb->data_len += copy; | 1018 | skb->data_len += copy; |
@@ -1079,18 +1026,95 @@ alloc_new_skb: | |||
1079 | return 0; | 1026 | return 0; |
1080 | 1027 | ||
1081 | error: | 1028 | error: |
1082 | inet->cork.length -= length; | 1029 | cork->length -= length; |
1083 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1030 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1084 | return err; | 1031 | return err; |
1085 | } | 1032 | } |
1086 | 1033 | ||
1087 | ssize_t ip_append_page(struct sock *sk, struct page *page, | 1034 | static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, |
1035 | struct ipcm_cookie *ipc, struct rtable **rtp) | ||
1036 | { | ||
1037 | struct inet_sock *inet = inet_sk(sk); | ||
1038 | struct ip_options_rcu *opt; | ||
1039 | struct rtable *rt; | ||
1040 | |||
1041 | /* | ||
1042 | * setup for corking. | ||
1043 | */ | ||
1044 | opt = ipc->opt; | ||
1045 | if (opt) { | ||
1046 | if (cork->opt == NULL) { | ||
1047 | cork->opt = kmalloc(sizeof(struct ip_options) + 40, | ||
1048 | sk->sk_allocation); | ||
1049 | if (unlikely(cork->opt == NULL)) | ||
1050 | return -ENOBUFS; | ||
1051 | } | ||
1052 | memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen); | ||
1053 | cork->flags |= IPCORK_OPT; | ||
1054 | cork->addr = ipc->addr; | ||
1055 | } | ||
1056 | rt = *rtp; | ||
1057 | if (unlikely(!rt)) | ||
1058 | return -EFAULT; | ||
1059 | /* | ||
1060 | * We steal reference to this route, caller should not release it | ||
1061 | */ | ||
1062 | *rtp = NULL; | ||
1063 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
1064 | rt->dst.dev->mtu : dst_mtu(&rt->dst); | ||
1065 | cork->dst = &rt->dst; | ||
1066 | cork->length = 0; | ||
1067 | cork->tx_flags = ipc->tx_flags; | ||
1068 | cork->page = NULL; | ||
1069 | cork->off = 0; | ||
1070 | |||
1071 | return 0; | ||
1072 | } | ||
1073 | |||
1074 | /* | ||
1075 | * ip_append_data() and ip_append_page() can make one large IP datagram | ||
1076 | * from many pieces of data. Each pieces will be holded on the socket | ||
1077 | * until ip_push_pending_frames() is called. Each piece can be a page | ||
1078 | * or non-page data. | ||
1079 | * | ||
1080 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | ||
1081 | * this interface potentially. | ||
1082 | * | ||
1083 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
1084 | */ | ||
1085 | int ip_append_data(struct sock *sk, struct flowi4 *fl4, | ||
1086 | int getfrag(void *from, char *to, int offset, int len, | ||
1087 | int odd, struct sk_buff *skb), | ||
1088 | void *from, int length, int transhdrlen, | ||
1089 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
1090 | unsigned int flags) | ||
1091 | { | ||
1092 | struct inet_sock *inet = inet_sk(sk); | ||
1093 | int err; | ||
1094 | |||
1095 | if (flags&MSG_PROBE) | ||
1096 | return 0; | ||
1097 | |||
1098 | if (skb_queue_empty(&sk->sk_write_queue)) { | ||
1099 | err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp); | ||
1100 | if (err) | ||
1101 | return err; | ||
1102 | } else { | ||
1103 | transhdrlen = 0; | ||
1104 | } | ||
1105 | |||
1106 | return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, | ||
1107 | from, length, transhdrlen, flags); | ||
1108 | } | ||
1109 | |||
1110 | ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, | ||
1088 | int offset, size_t size, int flags) | 1111 | int offset, size_t size, int flags) |
1089 | { | 1112 | { |
1090 | struct inet_sock *inet = inet_sk(sk); | 1113 | struct inet_sock *inet = inet_sk(sk); |
1091 | struct sk_buff *skb; | 1114 | struct sk_buff *skb; |
1092 | struct rtable *rt; | 1115 | struct rtable *rt; |
1093 | struct ip_options *opt = NULL; | 1116 | struct ip_options *opt = NULL; |
1117 | struct inet_cork *cork; | ||
1094 | int hh_len; | 1118 | int hh_len; |
1095 | int mtu; | 1119 | int mtu; |
1096 | int len; | 1120 | int len; |
@@ -1106,28 +1130,29 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1106 | if (skb_queue_empty(&sk->sk_write_queue)) | 1130 | if (skb_queue_empty(&sk->sk_write_queue)) |
1107 | return -EINVAL; | 1131 | return -EINVAL; |
1108 | 1132 | ||
1109 | rt = (struct rtable *)inet->cork.dst; | 1133 | cork = &inet->cork.base; |
1110 | if (inet->cork.flags & IPCORK_OPT) | 1134 | rt = (struct rtable *)cork->dst; |
1111 | opt = inet->cork.opt; | 1135 | if (cork->flags & IPCORK_OPT) |
1136 | opt = cork->opt; | ||
1112 | 1137 | ||
1113 | if (!(rt->dst.dev->features&NETIF_F_SG)) | 1138 | if (!(rt->dst.dev->features&NETIF_F_SG)) |
1114 | return -EOPNOTSUPP; | 1139 | return -EOPNOTSUPP; |
1115 | 1140 | ||
1116 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 1141 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
1117 | mtu = inet->cork.fragsize; | 1142 | mtu = cork->fragsize; |
1118 | 1143 | ||
1119 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 1144 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
1120 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 1145 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
1121 | 1146 | ||
1122 | if (inet->cork.length + size > 0xFFFF - fragheaderlen) { | 1147 | if (cork->length + size > 0xFFFF - fragheaderlen) { |
1123 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); | 1148 | ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, mtu); |
1124 | return -EMSGSIZE; | 1149 | return -EMSGSIZE; |
1125 | } | 1150 | } |
1126 | 1151 | ||
1127 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) | 1152 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) |
1128 | return -EINVAL; | 1153 | return -EINVAL; |
1129 | 1154 | ||
1130 | inet->cork.length += size; | 1155 | cork->length += size; |
1131 | if ((size + skb->len > mtu) && | 1156 | if ((size + skb->len > mtu) && |
1132 | (sk->sk_protocol == IPPROTO_UDP) && | 1157 | (sk->sk_protocol == IPPROTO_UDP) && |
1133 | (rt->dst.dev->features & NETIF_F_UFO)) { | 1158 | (rt->dst.dev->features & NETIF_F_UFO)) { |
@@ -1222,45 +1247,47 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1222 | return 0; | 1247 | return 0; |
1223 | 1248 | ||
1224 | error: | 1249 | error: |
1225 | inet->cork.length -= size; | 1250 | cork->length -= size; |
1226 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1251 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1227 | return err; | 1252 | return err; |
1228 | } | 1253 | } |
1229 | 1254 | ||
1230 | static void ip_cork_release(struct inet_sock *inet) | 1255 | static void ip_cork_release(struct inet_cork *cork) |
1231 | { | 1256 | { |
1232 | inet->cork.flags &= ~IPCORK_OPT; | 1257 | cork->flags &= ~IPCORK_OPT; |
1233 | kfree(inet->cork.opt); | 1258 | kfree(cork->opt); |
1234 | inet->cork.opt = NULL; | 1259 | cork->opt = NULL; |
1235 | dst_release(inet->cork.dst); | 1260 | dst_release(cork->dst); |
1236 | inet->cork.dst = NULL; | 1261 | cork->dst = NULL; |
1237 | } | 1262 | } |
1238 | 1263 | ||
1239 | /* | 1264 | /* |
1240 | * Combined all pending IP fragments on the socket as one IP datagram | 1265 | * Combined all pending IP fragments on the socket as one IP datagram |
1241 | * and push them out. | 1266 | * and push them out. |
1242 | */ | 1267 | */ |
1243 | int ip_push_pending_frames(struct sock *sk) | 1268 | struct sk_buff *__ip_make_skb(struct sock *sk, |
1269 | struct flowi4 *fl4, | ||
1270 | struct sk_buff_head *queue, | ||
1271 | struct inet_cork *cork) | ||
1244 | { | 1272 | { |
1245 | struct sk_buff *skb, *tmp_skb; | 1273 | struct sk_buff *skb, *tmp_skb; |
1246 | struct sk_buff **tail_skb; | 1274 | struct sk_buff **tail_skb; |
1247 | struct inet_sock *inet = inet_sk(sk); | 1275 | struct inet_sock *inet = inet_sk(sk); |
1248 | struct net *net = sock_net(sk); | 1276 | struct net *net = sock_net(sk); |
1249 | struct ip_options *opt = NULL; | 1277 | struct ip_options *opt = NULL; |
1250 | struct rtable *rt = (struct rtable *)inet->cork.dst; | 1278 | struct rtable *rt = (struct rtable *)cork->dst; |
1251 | struct iphdr *iph; | 1279 | struct iphdr *iph; |
1252 | __be16 df = 0; | 1280 | __be16 df = 0; |
1253 | __u8 ttl; | 1281 | __u8 ttl; |
1254 | int err = 0; | ||
1255 | 1282 | ||
1256 | if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) | 1283 | if ((skb = __skb_dequeue(queue)) == NULL) |
1257 | goto out; | 1284 | goto out; |
1258 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1285 | tail_skb = &(skb_shinfo(skb)->frag_list); |
1259 | 1286 | ||
1260 | /* move skb->data to ip header from ext header */ | 1287 | /* move skb->data to ip header from ext header */ |
1261 | if (skb->data < skb_network_header(skb)) | 1288 | if (skb->data < skb_network_header(skb)) |
1262 | __skb_pull(skb, skb_network_offset(skb)); | 1289 | __skb_pull(skb, skb_network_offset(skb)); |
1263 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { | 1290 | while ((tmp_skb = __skb_dequeue(queue)) != NULL) { |
1264 | __skb_pull(tmp_skb, skb_network_header_len(skb)); | 1291 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
1265 | *tail_skb = tmp_skb; | 1292 | *tail_skb = tmp_skb; |
1266 | tail_skb = &(tmp_skb->next); | 1293 | tail_skb = &(tmp_skb->next); |
@@ -1286,8 +1313,8 @@ int ip_push_pending_frames(struct sock *sk) | |||
1286 | ip_dont_fragment(sk, &rt->dst))) | 1313 | ip_dont_fragment(sk, &rt->dst))) |
1287 | df = htons(IP_DF); | 1314 | df = htons(IP_DF); |
1288 | 1315 | ||
1289 | if (inet->cork.flags & IPCORK_OPT) | 1316 | if (cork->flags & IPCORK_OPT) |
1290 | opt = inet->cork.opt; | 1317 | opt = cork->opt; |
1291 | 1318 | ||
1292 | if (rt->rt_type == RTN_MULTICAST) | 1319 | if (rt->rt_type == RTN_MULTICAST) |
1293 | ttl = inet->mc_ttl; | 1320 | ttl = inet->mc_ttl; |
@@ -1297,17 +1324,18 @@ int ip_push_pending_frames(struct sock *sk) | |||
1297 | iph = (struct iphdr *)skb->data; | 1324 | iph = (struct iphdr *)skb->data; |
1298 | iph->version = 4; | 1325 | iph->version = 4; |
1299 | iph->ihl = 5; | 1326 | iph->ihl = 5; |
1300 | if (opt) { | ||
1301 | iph->ihl += opt->optlen>>2; | ||
1302 | ip_options_build(skb, opt, inet->cork.addr, rt, 0); | ||
1303 | } | ||
1304 | iph->tos = inet->tos; | 1327 | iph->tos = inet->tos; |
1305 | iph->frag_off = df; | 1328 | iph->frag_off = df; |
1306 | ip_select_ident(iph, &rt->dst, sk); | 1329 | ip_select_ident(iph, &rt->dst, sk); |
1307 | iph->ttl = ttl; | 1330 | iph->ttl = ttl; |
1308 | iph->protocol = sk->sk_protocol; | 1331 | iph->protocol = sk->sk_protocol; |
1309 | iph->saddr = rt->rt_src; | 1332 | iph->saddr = fl4->saddr; |
1310 | iph->daddr = rt->rt_dst; | 1333 | iph->daddr = fl4->daddr; |
1334 | |||
1335 | if (opt) { | ||
1336 | iph->ihl += opt->optlen>>2; | ||
1337 | ip_options_build(skb, opt, cork->addr, rt, 0); | ||
1338 | } | ||
1311 | 1339 | ||
1312 | skb->priority = sk->sk_priority; | 1340 | skb->priority = sk->sk_priority; |
1313 | skb->mark = sk->sk_mark; | 1341 | skb->mark = sk->sk_mark; |
@@ -1315,44 +1343,99 @@ int ip_push_pending_frames(struct sock *sk) | |||
1315 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec | 1343 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec |
1316 | * on dst refcount | 1344 | * on dst refcount |
1317 | */ | 1345 | */ |
1318 | inet->cork.dst = NULL; | 1346 | cork->dst = NULL; |
1319 | skb_dst_set(skb, &rt->dst); | 1347 | skb_dst_set(skb, &rt->dst); |
1320 | 1348 | ||
1321 | if (iph->protocol == IPPROTO_ICMP) | 1349 | if (iph->protocol == IPPROTO_ICMP) |
1322 | icmp_out_count(net, ((struct icmphdr *) | 1350 | icmp_out_count(net, ((struct icmphdr *) |
1323 | skb_transport_header(skb))->type); | 1351 | skb_transport_header(skb))->type); |
1324 | 1352 | ||
1325 | /* Netfilter gets whole the not fragmented skb. */ | 1353 | ip_cork_release(cork); |
1354 | out: | ||
1355 | return skb; | ||
1356 | } | ||
1357 | |||
1358 | int ip_send_skb(struct sk_buff *skb) | ||
1359 | { | ||
1360 | struct net *net = sock_net(skb->sk); | ||
1361 | int err; | ||
1362 | |||
1326 | err = ip_local_out(skb); | 1363 | err = ip_local_out(skb); |
1327 | if (err) { | 1364 | if (err) { |
1328 | if (err > 0) | 1365 | if (err > 0) |
1329 | err = net_xmit_errno(err); | 1366 | err = net_xmit_errno(err); |
1330 | if (err) | 1367 | if (err) |
1331 | goto error; | 1368 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
1332 | } | 1369 | } |
1333 | 1370 | ||
1334 | out: | ||
1335 | ip_cork_release(inet); | ||
1336 | return err; | 1371 | return err; |
1372 | } | ||
1337 | 1373 | ||
1338 | error: | 1374 | int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) |
1339 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); | 1375 | { |
1340 | goto out; | 1376 | struct sk_buff *skb; |
1377 | |||
1378 | skb = ip_finish_skb(sk, fl4); | ||
1379 | if (!skb) | ||
1380 | return 0; | ||
1381 | |||
1382 | /* Netfilter gets whole the not fragmented skb. */ | ||
1383 | return ip_send_skb(skb); | ||
1341 | } | 1384 | } |
1342 | 1385 | ||
1343 | /* | 1386 | /* |
1344 | * Throw away all pending data on the socket. | 1387 | * Throw away all pending data on the socket. |
1345 | */ | 1388 | */ |
1346 | void ip_flush_pending_frames(struct sock *sk) | 1389 | static void __ip_flush_pending_frames(struct sock *sk, |
1390 | struct sk_buff_head *queue, | ||
1391 | struct inet_cork *cork) | ||
1347 | { | 1392 | { |
1348 | struct sk_buff *skb; | 1393 | struct sk_buff *skb; |
1349 | 1394 | ||
1350 | while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) | 1395 | while ((skb = __skb_dequeue_tail(queue)) != NULL) |
1351 | kfree_skb(skb); | 1396 | kfree_skb(skb); |
1352 | 1397 | ||
1353 | ip_cork_release(inet_sk(sk)); | 1398 | ip_cork_release(cork); |
1354 | } | 1399 | } |
1355 | 1400 | ||
1401 | void ip_flush_pending_frames(struct sock *sk) | ||
1402 | { | ||
1403 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base); | ||
1404 | } | ||
1405 | |||
1406 | struct sk_buff *ip_make_skb(struct sock *sk, | ||
1407 | struct flowi4 *fl4, | ||
1408 | int getfrag(void *from, char *to, int offset, | ||
1409 | int len, int odd, struct sk_buff *skb), | ||
1410 | void *from, int length, int transhdrlen, | ||
1411 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
1412 | unsigned int flags) | ||
1413 | { | ||
1414 | struct inet_cork cork; | ||
1415 | struct sk_buff_head queue; | ||
1416 | int err; | ||
1417 | |||
1418 | if (flags & MSG_PROBE) | ||
1419 | return NULL; | ||
1420 | |||
1421 | __skb_queue_head_init(&queue); | ||
1422 | |||
1423 | cork.flags = 0; | ||
1424 | cork.addr = 0; | ||
1425 | cork.opt = NULL; | ||
1426 | err = ip_setup_cork(sk, &cork, ipc, rtp); | ||
1427 | if (err) | ||
1428 | return ERR_PTR(err); | ||
1429 | |||
1430 | err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, | ||
1431 | from, length, transhdrlen, flags); | ||
1432 | if (err) { | ||
1433 | __ip_flush_pending_frames(sk, &queue, &cork); | ||
1434 | return ERR_PTR(err); | ||
1435 | } | ||
1436 | |||
1437 | return __ip_make_skb(sk, fl4, &queue, &cork); | ||
1438 | } | ||
1356 | 1439 | ||
1357 | /* | 1440 | /* |
1358 | * Fetch data from kernel space and fill in checksum if needed. | 1441 | * Fetch data from kernel space and fill in checksum if needed. |
@@ -1374,48 +1457,39 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, | |||
1374 | * Should run single threaded per socket because it uses the sock | 1457 | * Should run single threaded per socket because it uses the sock |
1375 | * structure to pass arguments. | 1458 | * structure to pass arguments. |
1376 | */ | 1459 | */ |
1377 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, | 1460 | void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, |
1378 | unsigned int len) | 1461 | struct ip_reply_arg *arg, unsigned int len) |
1379 | { | 1462 | { |
1380 | struct inet_sock *inet = inet_sk(sk); | 1463 | struct inet_sock *inet = inet_sk(sk); |
1381 | struct { | 1464 | struct ip_options_data replyopts; |
1382 | struct ip_options opt; | ||
1383 | char data[40]; | ||
1384 | } replyopts; | ||
1385 | struct ipcm_cookie ipc; | 1465 | struct ipcm_cookie ipc; |
1386 | __be32 daddr; | 1466 | struct flowi4 fl4; |
1387 | struct rtable *rt = skb_rtable(skb); | 1467 | struct rtable *rt = skb_rtable(skb); |
1388 | 1468 | ||
1389 | if (ip_options_echo(&replyopts.opt, skb)) | 1469 | if (ip_options_echo(&replyopts.opt.opt, skb)) |
1390 | return; | 1470 | return; |
1391 | 1471 | ||
1392 | daddr = ipc.addr = rt->rt_src; | 1472 | ipc.addr = daddr; |
1393 | ipc.opt = NULL; | 1473 | ipc.opt = NULL; |
1394 | ipc.shtx.flags = 0; | 1474 | ipc.tx_flags = 0; |
1395 | 1475 | ||
1396 | if (replyopts.opt.optlen) { | 1476 | if (replyopts.opt.opt.optlen) { |
1397 | ipc.opt = &replyopts.opt; | 1477 | ipc.opt = &replyopts.opt; |
1398 | 1478 | ||
1399 | if (ipc.opt->srr) | 1479 | if (replyopts.opt.opt.srr) |
1400 | daddr = replyopts.opt.faddr; | 1480 | daddr = replyopts.opt.opt.faddr; |
1401 | } | 1481 | } |
1402 | 1482 | ||
1403 | { | 1483 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, |
1404 | struct flowi fl = { .oif = arg->bound_dev_if, | 1484 | RT_TOS(ip_hdr(skb)->tos), |
1405 | .nl_u = { .ip4_u = | 1485 | RT_SCOPE_UNIVERSE, sk->sk_protocol, |
1406 | { .daddr = daddr, | 1486 | ip_reply_arg_flowi_flags(arg), |
1407 | .saddr = rt->rt_spec_dst, | 1487 | daddr, rt->rt_spec_dst, |
1408 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, | 1488 | tcp_hdr(skb)->source, tcp_hdr(skb)->dest); |
1409 | /* Not quite clean, but right. */ | 1489 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
1410 | .uli_u = { .ports = | 1490 | rt = ip_route_output_key(sock_net(sk), &fl4); |
1411 | { .sport = tcp_hdr(skb)->dest, | 1491 | if (IS_ERR(rt)) |
1412 | .dport = tcp_hdr(skb)->source } }, | 1492 | return; |
1413 | .proto = sk->sk_protocol, | ||
1414 | .flags = ip_reply_arg_flowi_flags(arg) }; | ||
1415 | security_skb_classify_flow(skb, &fl); | ||
1416 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) | ||
1417 | return; | ||
1418 | } | ||
1419 | 1493 | ||
1420 | /* And let IP do all the hard work. | 1494 | /* And let IP do all the hard work. |
1421 | 1495 | ||
@@ -1428,7 +1502,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1428 | sk->sk_priority = skb->priority; | 1502 | sk->sk_priority = skb->priority; |
1429 | sk->sk_protocol = ip_hdr(skb)->protocol; | 1503 | sk->sk_protocol = ip_hdr(skb)->protocol; |
1430 | sk->sk_bound_dev_if = arg->bound_dev_if; | 1504 | sk->sk_bound_dev_if = arg->bound_dev_if; |
1431 | ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, | 1505 | ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, |
1432 | &ipc, &rt, MSG_DONTWAIT); | 1506 | &ipc, &rt, MSG_DONTWAIT); |
1433 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { | 1507 | if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
1434 | if (arg->csumoffset >= 0) | 1508 | if (arg->csumoffset >= 0) |
@@ -1436,7 +1510,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1436 | arg->csumoffset) = csum_fold(csum_add(skb->csum, | 1510 | arg->csumoffset) = csum_fold(csum_add(skb->csum, |
1437 | arg->csum)); | 1511 | arg->csum)); |
1438 | skb->ip_summed = CHECKSUM_NONE; | 1512 | skb->ip_summed = CHECKSUM_NONE; |
1439 | ip_push_pending_frames(sk); | 1513 | ip_push_pending_frames(sk, &fl4); |
1440 | } | 1514 | } |
1441 | 1515 | ||
1442 | bh_unlock_sock(sk); | 1516 | bh_unlock_sock(sk); |