diff options
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r-- | net/ipv4/ip_output.c | 347 |
1 files changed, 211 insertions, 136 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04c7b3ba6b39..459c011b1d4a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -339,25 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
339 | if(opt && opt->srr) | 339 | if(opt && opt->srr) |
340 | daddr = opt->faddr; | 340 | daddr = opt->faddr; |
341 | 341 | ||
342 | { | 342 | /* If this fails, retransmit mechanism of transport layer will |
343 | struct flowi fl = { .oif = sk->sk_bound_dev_if, | 343 | * keep trying until route appears or the connection times |
344 | .mark = sk->sk_mark, | 344 | * itself out. |
345 | .fl4_dst = daddr, | 345 | */ |
346 | .fl4_src = inet->inet_saddr, | 346 | rt = ip_route_output_ports(sock_net(sk), sk, |
347 | .fl4_tos = RT_CONN_FLAGS(sk), | 347 | daddr, inet->inet_saddr, |
348 | .proto = sk->sk_protocol, | 348 | inet->inet_dport, |
349 | .flags = inet_sk_flowi_flags(sk), | 349 | inet->inet_sport, |
350 | .fl_ip_sport = inet->inet_sport, | 350 | sk->sk_protocol, |
351 | .fl_ip_dport = inet->inet_dport }; | 351 | RT_CONN_FLAGS(sk), |
352 | 352 | sk->sk_bound_dev_if); | |
353 | /* If this fails, retransmit mechanism of transport layer will | 353 | if (IS_ERR(rt)) |
354 | * keep trying until route appears or the connection times | 354 | goto no_route; |
355 | * itself out. | ||
356 | */ | ||
357 | security_sk_classify_flow(sk, &fl); | ||
358 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) | ||
359 | goto no_route; | ||
360 | } | ||
361 | sk_setup_caps(sk, &rt->dst); | 355 | sk_setup_caps(sk, &rt->dst); |
362 | } | 356 | } |
363 | skb_dst_set_noref(skb, &rt->dst); | 357 | skb_dst_set_noref(skb, &rt->dst); |
@@ -609,7 +603,7 @@ slow_path: | |||
609 | /* IF: it doesn't fit, use 'mtu' - the data space left */ | 603 | /* IF: it doesn't fit, use 'mtu' - the data space left */ |
610 | if (len > mtu) | 604 | if (len > mtu) |
611 | len = mtu; | 605 | len = mtu; |
612 | /* IF: we are not sending upto and including the packet end | 606 | /* IF: we are not sending up to and including the packet end |
613 | then align the next start on an eight byte boundary */ | 607 | then align the next start on an eight byte boundary */ |
614 | if (len < left) { | 608 | if (len < left) { |
615 | len &= ~7; | 609 | len &= ~7; |
@@ -733,6 +727,7 @@ csum_page(struct page *page, int offset, int copy) | |||
733 | } | 727 | } |
734 | 728 | ||
735 | static inline int ip_ufo_append_data(struct sock *sk, | 729 | static inline int ip_ufo_append_data(struct sock *sk, |
730 | struct sk_buff_head *queue, | ||
736 | int getfrag(void *from, char *to, int offset, int len, | 731 | int getfrag(void *from, char *to, int offset, int len, |
737 | int odd, struct sk_buff *skb), | 732 | int odd, struct sk_buff *skb), |
738 | void *from, int length, int hh_len, int fragheaderlen, | 733 | void *from, int length, int hh_len, int fragheaderlen, |
@@ -745,7 +740,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
745 | * device, so create one single skb packet containing complete | 740 | * device, so create one single skb packet containing complete |
746 | * udp datagram | 741 | * udp datagram |
747 | */ | 742 | */ |
748 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | 743 | if ((skb = skb_peek_tail(queue)) == NULL) { |
749 | skb = sock_alloc_send_skb(sk, | 744 | skb = sock_alloc_send_skb(sk, |
750 | hh_len + fragheaderlen + transhdrlen + 20, | 745 | hh_len + fragheaderlen + transhdrlen + 20, |
751 | (flags & MSG_DONTWAIT), &err); | 746 | (flags & MSG_DONTWAIT), &err); |
@@ -767,40 +762,28 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
767 | 762 | ||
768 | skb->ip_summed = CHECKSUM_PARTIAL; | 763 | skb->ip_summed = CHECKSUM_PARTIAL; |
769 | skb->csum = 0; | 764 | skb->csum = 0; |
770 | sk->sk_sndmsg_off = 0; | ||
771 | 765 | ||
772 | /* specify the length of each IP datagram fragment */ | 766 | /* specify the length of each IP datagram fragment */ |
773 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 767 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; |
774 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 768 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
775 | __skb_queue_tail(&sk->sk_write_queue, skb); | 769 | __skb_queue_tail(queue, skb); |
776 | } | 770 | } |
777 | 771 | ||
778 | return skb_append_datato_frags(sk, skb, getfrag, from, | 772 | return skb_append_datato_frags(sk, skb, getfrag, from, |
779 | (length - transhdrlen)); | 773 | (length - transhdrlen)); |
780 | } | 774 | } |
781 | 775 | ||
782 | /* | 776 | static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, |
783 | * ip_append_data() and ip_append_page() can make one large IP datagram | 777 | struct inet_cork *cork, |
784 | * from many pieces of data. Each pieces will be holded on the socket | 778 | int getfrag(void *from, char *to, int offset, |
785 | * until ip_push_pending_frames() is called. Each piece can be a page | 779 | int len, int odd, struct sk_buff *skb), |
786 | * or non-page data. | 780 | void *from, int length, int transhdrlen, |
787 | * | 781 | unsigned int flags) |
788 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | ||
789 | * this interface potentially. | ||
790 | * | ||
791 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
792 | */ | ||
793 | int ip_append_data(struct sock *sk, | ||
794 | int getfrag(void *from, char *to, int offset, int len, | ||
795 | int odd, struct sk_buff *skb), | ||
796 | void *from, int length, int transhdrlen, | ||
797 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
798 | unsigned int flags) | ||
799 | { | 782 | { |
800 | struct inet_sock *inet = inet_sk(sk); | 783 | struct inet_sock *inet = inet_sk(sk); |
801 | struct sk_buff *skb; | 784 | struct sk_buff *skb; |
802 | 785 | ||
803 | struct ip_options *opt = NULL; | 786 | struct ip_options *opt = cork->opt; |
804 | int hh_len; | 787 | int hh_len; |
805 | int exthdrlen; | 788 | int exthdrlen; |
806 | int mtu; | 789 | int mtu; |
@@ -809,58 +792,19 @@ int ip_append_data(struct sock *sk, | |||
809 | int offset = 0; | 792 | int offset = 0; |
810 | unsigned int maxfraglen, fragheaderlen; | 793 | unsigned int maxfraglen, fragheaderlen; |
811 | int csummode = CHECKSUM_NONE; | 794 | int csummode = CHECKSUM_NONE; |
812 | struct rtable *rt; | 795 | struct rtable *rt = (struct rtable *)cork->dst; |
813 | 796 | ||
814 | if (flags&MSG_PROBE) | 797 | exthdrlen = transhdrlen ? rt->dst.header_len : 0; |
815 | return 0; | 798 | length += exthdrlen; |
816 | 799 | transhdrlen += exthdrlen; | |
817 | if (skb_queue_empty(&sk->sk_write_queue)) { | 800 | mtu = cork->fragsize; |
818 | /* | ||
819 | * setup for corking. | ||
820 | */ | ||
821 | opt = ipc->opt; | ||
822 | if (opt) { | ||
823 | if (inet->cork.opt == NULL) { | ||
824 | inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); | ||
825 | if (unlikely(inet->cork.opt == NULL)) | ||
826 | return -ENOBUFS; | ||
827 | } | ||
828 | memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); | ||
829 | inet->cork.flags |= IPCORK_OPT; | ||
830 | inet->cork.addr = ipc->addr; | ||
831 | } | ||
832 | rt = *rtp; | ||
833 | if (unlikely(!rt)) | ||
834 | return -EFAULT; | ||
835 | /* | ||
836 | * We steal reference to this route, caller should not release it | ||
837 | */ | ||
838 | *rtp = NULL; | ||
839 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
840 | rt->dst.dev->mtu : | ||
841 | dst_mtu(rt->dst.path); | ||
842 | inet->cork.dst = &rt->dst; | ||
843 | inet->cork.length = 0; | ||
844 | sk->sk_sndmsg_page = NULL; | ||
845 | sk->sk_sndmsg_off = 0; | ||
846 | exthdrlen = rt->dst.header_len; | ||
847 | length += exthdrlen; | ||
848 | transhdrlen += exthdrlen; | ||
849 | } else { | ||
850 | rt = (struct rtable *)inet->cork.dst; | ||
851 | if (inet->cork.flags & IPCORK_OPT) | ||
852 | opt = inet->cork.opt; | ||
853 | 801 | ||
854 | transhdrlen = 0; | ||
855 | exthdrlen = 0; | ||
856 | mtu = inet->cork.fragsize; | ||
857 | } | ||
858 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 802 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
859 | 803 | ||
860 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 804 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
861 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 805 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
862 | 806 | ||
863 | if (inet->cork.length + length > 0xFFFF - fragheaderlen) { | 807 | if (cork->length + length > 0xFFFF - fragheaderlen) { |
864 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 808 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, |
865 | mtu-exthdrlen); | 809 | mtu-exthdrlen); |
866 | return -EMSGSIZE; | 810 | return -EMSGSIZE; |
@@ -876,15 +820,15 @@ int ip_append_data(struct sock *sk, | |||
876 | !exthdrlen) | 820 | !exthdrlen) |
877 | csummode = CHECKSUM_PARTIAL; | 821 | csummode = CHECKSUM_PARTIAL; |
878 | 822 | ||
879 | skb = skb_peek_tail(&sk->sk_write_queue); | 823 | skb = skb_peek_tail(queue); |
880 | 824 | ||
881 | inet->cork.length += length; | 825 | cork->length += length; |
882 | if (((length > mtu) || (skb && skb_is_gso(skb))) && | 826 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
883 | (sk->sk_protocol == IPPROTO_UDP) && | 827 | (sk->sk_protocol == IPPROTO_UDP) && |
884 | (rt->dst.dev->features & NETIF_F_UFO)) { | 828 | (rt->dst.dev->features & NETIF_F_UFO)) { |
885 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, | 829 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, |
886 | fragheaderlen, transhdrlen, mtu, | 830 | hh_len, fragheaderlen, transhdrlen, |
887 | flags); | 831 | mtu, flags); |
888 | if (err) | 832 | if (err) |
889 | goto error; | 833 | goto error; |
890 | return 0; | 834 | return 0; |
@@ -961,7 +905,7 @@ alloc_new_skb: | |||
961 | else | 905 | else |
962 | /* only the initial fragment is | 906 | /* only the initial fragment is |
963 | time stamped */ | 907 | time stamped */ |
964 | ipc->tx_flags = 0; | 908 | cork->tx_flags = 0; |
965 | } | 909 | } |
966 | if (skb == NULL) | 910 | if (skb == NULL) |
967 | goto error; | 911 | goto error; |
@@ -972,7 +916,7 @@ alloc_new_skb: | |||
972 | skb->ip_summed = csummode; | 916 | skb->ip_summed = csummode; |
973 | skb->csum = 0; | 917 | skb->csum = 0; |
974 | skb_reserve(skb, hh_len); | 918 | skb_reserve(skb, hh_len); |
975 | skb_shinfo(skb)->tx_flags = ipc->tx_flags; | 919 | skb_shinfo(skb)->tx_flags = cork->tx_flags; |
976 | 920 | ||
977 | /* | 921 | /* |
978 | * Find where to start putting bytes. | 922 | * Find where to start putting bytes. |
@@ -1009,7 +953,7 @@ alloc_new_skb: | |||
1009 | /* | 953 | /* |
1010 | * Put the packet on the pending queue. | 954 | * Put the packet on the pending queue. |
1011 | */ | 955 | */ |
1012 | __skb_queue_tail(&sk->sk_write_queue, skb); | 956 | __skb_queue_tail(queue, skb); |
1013 | continue; | 957 | continue; |
1014 | } | 958 | } |
1015 | 959 | ||
@@ -1029,8 +973,8 @@ alloc_new_skb: | |||
1029 | } else { | 973 | } else { |
1030 | int i = skb_shinfo(skb)->nr_frags; | 974 | int i = skb_shinfo(skb)->nr_frags; |
1031 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; | 975 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; |
1032 | struct page *page = sk->sk_sndmsg_page; | 976 | struct page *page = cork->page; |
1033 | int off = sk->sk_sndmsg_off; | 977 | int off = cork->off; |
1034 | unsigned int left; | 978 | unsigned int left; |
1035 | 979 | ||
1036 | if (page && (left = PAGE_SIZE - off) > 0) { | 980 | if (page && (left = PAGE_SIZE - off) > 0) { |
@@ -1042,7 +986,7 @@ alloc_new_skb: | |||
1042 | goto error; | 986 | goto error; |
1043 | } | 987 | } |
1044 | get_page(page); | 988 | get_page(page); |
1045 | skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); | 989 | skb_fill_page_desc(skb, i, page, off, 0); |
1046 | frag = &skb_shinfo(skb)->frags[i]; | 990 | frag = &skb_shinfo(skb)->frags[i]; |
1047 | } | 991 | } |
1048 | } else if (i < MAX_SKB_FRAGS) { | 992 | } else if (i < MAX_SKB_FRAGS) { |
@@ -1053,8 +997,8 @@ alloc_new_skb: | |||
1053 | err = -ENOMEM; | 997 | err = -ENOMEM; |
1054 | goto error; | 998 | goto error; |
1055 | } | 999 | } |
1056 | sk->sk_sndmsg_page = page; | 1000 | cork->page = page; |
1057 | sk->sk_sndmsg_off = 0; | 1001 | cork->off = 0; |
1058 | 1002 | ||
1059 | skb_fill_page_desc(skb, i, page, 0, 0); | 1003 | skb_fill_page_desc(skb, i, page, 0, 0); |
1060 | frag = &skb_shinfo(skb)->frags[i]; | 1004 | frag = &skb_shinfo(skb)->frags[i]; |
@@ -1066,7 +1010,7 @@ alloc_new_skb: | |||
1066 | err = -EFAULT; | 1010 | err = -EFAULT; |
1067 | goto error; | 1011 | goto error; |
1068 | } | 1012 | } |
1069 | sk->sk_sndmsg_off += copy; | 1013 | cork->off += copy; |
1070 | frag->size += copy; | 1014 | frag->size += copy; |
1071 | skb->len += copy; | 1015 | skb->len += copy; |
1072 | skb->data_len += copy; | 1016 | skb->data_len += copy; |
@@ -1080,11 +1024,87 @@ alloc_new_skb: | |||
1080 | return 0; | 1024 | return 0; |
1081 | 1025 | ||
1082 | error: | 1026 | error: |
1083 | inet->cork.length -= length; | 1027 | cork->length -= length; |
1084 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); | 1028 | IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); |
1085 | return err; | 1029 | return err; |
1086 | } | 1030 | } |
1087 | 1031 | ||
1032 | static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | ||
1033 | struct ipcm_cookie *ipc, struct rtable **rtp) | ||
1034 | { | ||
1035 | struct inet_sock *inet = inet_sk(sk); | ||
1036 | struct ip_options *opt; | ||
1037 | struct rtable *rt; | ||
1038 | |||
1039 | /* | ||
1040 | * setup for corking. | ||
1041 | */ | ||
1042 | opt = ipc->opt; | ||
1043 | if (opt) { | ||
1044 | if (cork->opt == NULL) { | ||
1045 | cork->opt = kmalloc(sizeof(struct ip_options) + 40, | ||
1046 | sk->sk_allocation); | ||
1047 | if (unlikely(cork->opt == NULL)) | ||
1048 | return -ENOBUFS; | ||
1049 | } | ||
1050 | memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); | ||
1051 | cork->flags |= IPCORK_OPT; | ||
1052 | cork->addr = ipc->addr; | ||
1053 | } | ||
1054 | rt = *rtp; | ||
1055 | if (unlikely(!rt)) | ||
1056 | return -EFAULT; | ||
1057 | /* | ||
1058 | * We steal reference to this route, caller should not release it | ||
1059 | */ | ||
1060 | *rtp = NULL; | ||
1061 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? | ||
1062 | rt->dst.dev->mtu : dst_mtu(rt->dst.path); | ||
1063 | cork->dst = &rt->dst; | ||
1064 | cork->length = 0; | ||
1065 | cork->tx_flags = ipc->tx_flags; | ||
1066 | cork->page = NULL; | ||
1067 | cork->off = 0; | ||
1068 | |||
1069 | return 0; | ||
1070 | } | ||
1071 | |||
1072 | /* | ||
1073 | * ip_append_data() and ip_append_page() can make one large IP datagram | ||
1074 | * from many pieces of data. Each pieces will be holded on the socket | ||
1075 | * until ip_push_pending_frames() is called. Each piece can be a page | ||
1076 | * or non-page data. | ||
1077 | * | ||
1078 | * Not only UDP, other transport protocols - e.g. raw sockets - can use | ||
1079 | * this interface potentially. | ||
1080 | * | ||
1081 | * LATER: length must be adjusted by pad at tail, when it is required. | ||
1082 | */ | ||
1083 | int ip_append_data(struct sock *sk, | ||
1084 | int getfrag(void *from, char *to, int offset, int len, | ||
1085 | int odd, struct sk_buff *skb), | ||
1086 | void *from, int length, int transhdrlen, | ||
1087 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
1088 | unsigned int flags) | ||
1089 | { | ||
1090 | struct inet_sock *inet = inet_sk(sk); | ||
1091 | int err; | ||
1092 | |||
1093 | if (flags&MSG_PROBE) | ||
1094 | return 0; | ||
1095 | |||
1096 | if (skb_queue_empty(&sk->sk_write_queue)) { | ||
1097 | err = ip_setup_cork(sk, &inet->cork, ipc, rtp); | ||
1098 | if (err) | ||
1099 | return err; | ||
1100 | } else { | ||
1101 | transhdrlen = 0; | ||
1102 | } | ||
1103 | |||
1104 | return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, | ||
1105 | from, length, transhdrlen, flags); | ||
1106 | } | ||
1107 | |||
1088 | ssize_t ip_append_page(struct sock *sk, struct page *page, | 1108 | ssize_t ip_append_page(struct sock *sk, struct page *page, |
1089 | int offset, size_t size, int flags) | 1109 | int offset, size_t size, int flags) |
1090 | { | 1110 | { |
@@ -1228,40 +1248,41 @@ error: | |||
1228 | return err; | 1248 | return err; |
1229 | } | 1249 | } |
1230 | 1250 | ||
1231 | static void ip_cork_release(struct inet_sock *inet) | 1251 | static void ip_cork_release(struct inet_cork *cork) |
1232 | { | 1252 | { |
1233 | inet->cork.flags &= ~IPCORK_OPT; | 1253 | cork->flags &= ~IPCORK_OPT; |
1234 | kfree(inet->cork.opt); | 1254 | kfree(cork->opt); |
1235 | inet->cork.opt = NULL; | 1255 | cork->opt = NULL; |
1236 | dst_release(inet->cork.dst); | 1256 | dst_release(cork->dst); |
1237 | inet->cork.dst = NULL; | 1257 | cork->dst = NULL; |
1238 | } | 1258 | } |
1239 | 1259 | ||
1240 | /* | 1260 | /* |
1241 | * Combined all pending IP fragments on the socket as one IP datagram | 1261 | * Combined all pending IP fragments on the socket as one IP datagram |
1242 | * and push them out. | 1262 | * and push them out. |
1243 | */ | 1263 | */ |
1244 | int ip_push_pending_frames(struct sock *sk) | 1264 | struct sk_buff *__ip_make_skb(struct sock *sk, |
1265 | struct sk_buff_head *queue, | ||
1266 | struct inet_cork *cork) | ||
1245 | { | 1267 | { |
1246 | struct sk_buff *skb, *tmp_skb; | 1268 | struct sk_buff *skb, *tmp_skb; |
1247 | struct sk_buff **tail_skb; | 1269 | struct sk_buff **tail_skb; |
1248 | struct inet_sock *inet = inet_sk(sk); | 1270 | struct inet_sock *inet = inet_sk(sk); |
1249 | struct net *net = sock_net(sk); | 1271 | struct net *net = sock_net(sk); |
1250 | struct ip_options *opt = NULL; | 1272 | struct ip_options *opt = NULL; |
1251 | struct rtable *rt = (struct rtable *)inet->cork.dst; | 1273 | struct rtable *rt = (struct rtable *)cork->dst; |
1252 | struct iphdr *iph; | 1274 | struct iphdr *iph; |
1253 | __be16 df = 0; | 1275 | __be16 df = 0; |
1254 | __u8 ttl; | 1276 | __u8 ttl; |
1255 | int err = 0; | ||
1256 | 1277 | ||
1257 | if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) | 1278 | if ((skb = __skb_dequeue(queue)) == NULL) |
1258 | goto out; | 1279 | goto out; |
1259 | tail_skb = &(skb_shinfo(skb)->frag_list); | 1280 | tail_skb = &(skb_shinfo(skb)->frag_list); |
1260 | 1281 | ||
1261 | /* move skb->data to ip header from ext header */ | 1282 | /* move skb->data to ip header from ext header */ |
1262 | if (skb->data < skb_network_header(skb)) | 1283 | if (skb->data < skb_network_header(skb)) |
1263 | __skb_pull(skb, skb_network_offset(skb)); | 1284 | __skb_pull(skb, skb_network_offset(skb)); |
1264 | while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { | 1285 | while ((tmp_skb = __skb_dequeue(queue)) != NULL) { |
1265 | __skb_pull(tmp_skb, skb_network_header_len(skb)); | 1286 | __skb_pull(tmp_skb, skb_network_header_len(skb)); |
1266 | *tail_skb = tmp_skb; | 1287 | *tail_skb = tmp_skb; |
1267 | tail_skb = &(tmp_skb->next); | 1288 | tail_skb = &(tmp_skb->next); |
@@ -1287,8 +1308,8 @@ int ip_push_pending_frames(struct sock *sk) | |||
1287 | ip_dont_fragment(sk, &rt->dst))) | 1308 | ip_dont_fragment(sk, &rt->dst))) |
1288 | df = htons(IP_DF); | 1309 | df = htons(IP_DF); |
1289 | 1310 | ||
1290 | if (inet->cork.flags & IPCORK_OPT) | 1311 | if (cork->flags & IPCORK_OPT) |
1291 | opt = inet->cork.opt; | 1312 | opt = cork->opt; |
1292 | 1313 | ||
1293 | if (rt->rt_type == RTN_MULTICAST) | 1314 | if (rt->rt_type == RTN_MULTICAST) |
1294 | ttl = inet->mc_ttl; | 1315 | ttl = inet->mc_ttl; |
@@ -1300,7 +1321,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1300 | iph->ihl = 5; | 1321 | iph->ihl = 5; |
1301 | if (opt) { | 1322 | if (opt) { |
1302 | iph->ihl += opt->optlen>>2; | 1323 | iph->ihl += opt->optlen>>2; |
1303 | ip_options_build(skb, opt, inet->cork.addr, rt, 0); | 1324 | ip_options_build(skb, opt, cork->addr, rt, 0); |
1304 | } | 1325 | } |
1305 | iph->tos = inet->tos; | 1326 | iph->tos = inet->tos; |
1306 | iph->frag_off = df; | 1327 | iph->frag_off = df; |
@@ -1316,44 +1337,95 @@ int ip_push_pending_frames(struct sock *sk) | |||
1316 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec | 1337 | * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec |
1317 | * on dst refcount | 1338 | * on dst refcount |
1318 | */ | 1339 | */ |
1319 | inet->cork.dst = NULL; | 1340 | cork->dst = NULL; |
1320 | skb_dst_set(skb, &rt->dst); | 1341 | skb_dst_set(skb, &rt->dst); |
1321 | 1342 | ||
1322 | if (iph->protocol == IPPROTO_ICMP) | 1343 | if (iph->protocol == IPPROTO_ICMP) |
1323 | icmp_out_count(net, ((struct icmphdr *) | 1344 | icmp_out_count(net, ((struct icmphdr *) |
1324 | skb_transport_header(skb))->type); | 1345 | skb_transport_header(skb))->type); |
1325 | 1346 | ||
1326 | /* Netfilter gets whole the not fragmented skb. */ | 1347 | ip_cork_release(cork); |
1348 | out: | ||
1349 | return skb; | ||
1350 | } | ||
1351 | |||
1352 | int ip_send_skb(struct sk_buff *skb) | ||
1353 | { | ||
1354 | struct net *net = sock_net(skb->sk); | ||
1355 | int err; | ||
1356 | |||
1327 | err = ip_local_out(skb); | 1357 | err = ip_local_out(skb); |
1328 | if (err) { | 1358 | if (err) { |
1329 | if (err > 0) | 1359 | if (err > 0) |
1330 | err = net_xmit_errno(err); | 1360 | err = net_xmit_errno(err); |
1331 | if (err) | 1361 | if (err) |
1332 | goto error; | 1362 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
1333 | } | 1363 | } |
1334 | 1364 | ||
1335 | out: | ||
1336 | ip_cork_release(inet); | ||
1337 | return err; | 1365 | return err; |
1366 | } | ||
1338 | 1367 | ||
1339 | error: | 1368 | int ip_push_pending_frames(struct sock *sk) |
1340 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); | 1369 | { |
1341 | goto out; | 1370 | struct sk_buff *skb; |
1371 | |||
1372 | skb = ip_finish_skb(sk); | ||
1373 | if (!skb) | ||
1374 | return 0; | ||
1375 | |||
1376 | /* Netfilter gets whole the not fragmented skb. */ | ||
1377 | return ip_send_skb(skb); | ||
1342 | } | 1378 | } |
1343 | 1379 | ||
1344 | /* | 1380 | /* |
1345 | * Throw away all pending data on the socket. | 1381 | * Throw away all pending data on the socket. |
1346 | */ | 1382 | */ |
1347 | void ip_flush_pending_frames(struct sock *sk) | 1383 | static void __ip_flush_pending_frames(struct sock *sk, |
1384 | struct sk_buff_head *queue, | ||
1385 | struct inet_cork *cork) | ||
1348 | { | 1386 | { |
1349 | struct sk_buff *skb; | 1387 | struct sk_buff *skb; |
1350 | 1388 | ||
1351 | while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) | 1389 | while ((skb = __skb_dequeue_tail(queue)) != NULL) |
1352 | kfree_skb(skb); | 1390 | kfree_skb(skb); |
1353 | 1391 | ||
1354 | ip_cork_release(inet_sk(sk)); | 1392 | ip_cork_release(cork); |
1393 | } | ||
1394 | |||
1395 | void ip_flush_pending_frames(struct sock *sk) | ||
1396 | { | ||
1397 | __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); | ||
1355 | } | 1398 | } |
1356 | 1399 | ||
1400 | struct sk_buff *ip_make_skb(struct sock *sk, | ||
1401 | int getfrag(void *from, char *to, int offset, | ||
1402 | int len, int odd, struct sk_buff *skb), | ||
1403 | void *from, int length, int transhdrlen, | ||
1404 | struct ipcm_cookie *ipc, struct rtable **rtp, | ||
1405 | unsigned int flags) | ||
1406 | { | ||
1407 | struct inet_cork cork = {}; | ||
1408 | struct sk_buff_head queue; | ||
1409 | int err; | ||
1410 | |||
1411 | if (flags & MSG_PROBE) | ||
1412 | return NULL; | ||
1413 | |||
1414 | __skb_queue_head_init(&queue); | ||
1415 | |||
1416 | err = ip_setup_cork(sk, &cork, ipc, rtp); | ||
1417 | if (err) | ||
1418 | return ERR_PTR(err); | ||
1419 | |||
1420 | err = __ip_append_data(sk, &queue, &cork, getfrag, | ||
1421 | from, length, transhdrlen, flags); | ||
1422 | if (err) { | ||
1423 | __ip_flush_pending_frames(sk, &queue, &cork); | ||
1424 | return ERR_PTR(err); | ||
1425 | } | ||
1426 | |||
1427 | return __ip_make_skb(sk, &queue, &cork); | ||
1428 | } | ||
1357 | 1429 | ||
1358 | /* | 1430 | /* |
1359 | * Fetch data from kernel space and fill in checksum if needed. | 1431 | * Fetch data from kernel space and fill in checksum if needed. |
@@ -1402,16 +1474,19 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1402 | } | 1474 | } |
1403 | 1475 | ||
1404 | { | 1476 | { |
1405 | struct flowi fl = { .oif = arg->bound_dev_if, | 1477 | struct flowi4 fl4 = { |
1406 | .fl4_dst = daddr, | 1478 | .flowi4_oif = arg->bound_dev_if, |
1407 | .fl4_src = rt->rt_spec_dst, | 1479 | .daddr = daddr, |
1408 | .fl4_tos = RT_TOS(ip_hdr(skb)->tos), | 1480 | .saddr = rt->rt_spec_dst, |
1409 | .fl_ip_sport = tcp_hdr(skb)->dest, | 1481 | .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), |
1410 | .fl_ip_dport = tcp_hdr(skb)->source, | 1482 | .fl4_sport = tcp_hdr(skb)->dest, |
1411 | .proto = sk->sk_protocol, | 1483 | .fl4_dport = tcp_hdr(skb)->source, |
1412 | .flags = ip_reply_arg_flowi_flags(arg) }; | 1484 | .flowi4_proto = sk->sk_protocol, |
1413 | security_skb_classify_flow(skb, &fl); | 1485 | .flowi4_flags = ip_reply_arg_flowi_flags(arg), |
1414 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) | 1486 | }; |
1487 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | ||
1488 | rt = ip_route_output_key(sock_net(sk), &fl4); | ||
1489 | if (IS_ERR(rt)) | ||
1415 | return; | 1490 | return; |
1416 | } | 1491 | } |
1417 | 1492 | ||