diff options
| author | Al Viro <viro@zeniv.linux.org.uk> | 2014-11-28 13:40:20 -0500 |
|---|---|---|
| committer | Al Viro <viro@zeniv.linux.org.uk> | 2015-02-04 01:34:14 -0500 |
| commit | 57be5bdad759b9dde8b0d0cc630782a1a4ac4b9f (patch) | |
| tree | 12d1b9c40bd20aa5e5038382fd20da05f09b2881 | |
| parent | cacdc7d2f9fa42e29b650e2879df42ea7d7833c1 (diff) | |
ip: convert tcp_sendmsg() to iov_iter primitives
patch is actually smaller than it seems to be - most of it is unindenting
the inner loop body in tcp_sendmsg() itself...
the bit in tcp_input.c is going to get reverted very soon - that's what
memcpy_from_msg() will become, but not in this commit; let's keep it
reasonably contained...
There's one potentially subtle change here: in case of short copy from
userland, mainline tcp_send_syn_data() discards the skb it has allocated
and falls back to normal path, where we'll send as much as possible after
rereading the same data again. This patch trims SYN+data skb instead -
that way we don't need to copy from the same place twice.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
| -rw-r--r-- | include/net/sock.h | 18 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 233 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 11 |
4 files changed, 123 insertions, 141 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 15341499786c..1e45e599a3ab 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) | |||
| 1803 | } | 1803 | } |
| 1804 | 1804 | ||
| 1805 | static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, | 1805 | static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, |
| 1806 | char __user *from, char *to, | 1806 | struct iov_iter *from, char *to, |
| 1807 | int copy, int offset) | 1807 | int copy, int offset) |
| 1808 | { | 1808 | { |
| 1809 | if (skb->ip_summed == CHECKSUM_NONE) { | 1809 | if (skb->ip_summed == CHECKSUM_NONE) { |
| 1810 | int err = 0; | 1810 | __wsum csum = 0; |
| 1811 | __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); | 1811 | if (csum_and_copy_from_iter(to, copy, &csum, from) != copy) |
| 1812 | if (err) | 1812 | return -EFAULT; |
| 1813 | return err; | ||
| 1814 | skb->csum = csum_block_add(skb->csum, csum, offset); | 1813 | skb->csum = csum_block_add(skb->csum, csum, offset); |
| 1815 | } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { | 1814 | } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { |
| 1816 | if (!access_ok(VERIFY_READ, from, copy) || | 1815 | if (copy_from_iter_nocache(to, copy, from) != copy) |
| 1817 | __copy_from_user_nocache(to, from, copy)) | ||
| 1818 | return -EFAULT; | 1816 | return -EFAULT; |
| 1819 | } else if (copy_from_user(to, from, copy)) | 1817 | } else if (copy_from_iter(to, copy, from) != copy) |
| 1820 | return -EFAULT; | 1818 | return -EFAULT; |
| 1821 | 1819 | ||
| 1822 | return 0; | 1820 | return 0; |
| 1823 | } | 1821 | } |
| 1824 | 1822 | ||
| 1825 | static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, | 1823 | static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, |
| 1826 | char __user *from, int copy) | 1824 | struct iov_iter *from, int copy) |
| 1827 | { | 1825 | { |
| 1828 | int err, offset = skb->len; | 1826 | int err, offset = skb->len; |
| 1829 | 1827 | ||
| @@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, | |||
| 1835 | return err; | 1833 | return err; |
| 1836 | } | 1834 | } |
| 1837 | 1835 | ||
| 1838 | static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, | 1836 | static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from, |
| 1839 | struct sk_buff *skb, | 1837 | struct sk_buff *skb, |
| 1840 | struct page *page, | 1838 | struct page *page, |
| 1841 | int off, int copy) | 1839 | int off, int copy) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3075723c729b..9d72a0fcd928 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, | |||
| 1067 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 1067 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
| 1068 | size_t size) | 1068 | size_t size) |
| 1069 | { | 1069 | { |
| 1070 | const struct iovec *iov; | ||
| 1071 | struct tcp_sock *tp = tcp_sk(sk); | 1070 | struct tcp_sock *tp = tcp_sk(sk); |
| 1072 | struct sk_buff *skb; | 1071 | struct sk_buff *skb; |
| 1073 | int iovlen, flags, err, copied = 0; | 1072 | int flags, err, copied = 0; |
| 1074 | int mss_now = 0, size_goal, copied_syn = 0, offset = 0; | 1073 | int mss_now = 0, size_goal, copied_syn = 0; |
| 1075 | bool sg; | 1074 | bool sg; |
| 1076 | long timeo; | 1075 | long timeo; |
| 1077 | 1076 | ||
| @@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1084 | goto out; | 1083 | goto out; |
| 1085 | else if (err) | 1084 | else if (err) |
| 1086 | goto out_err; | 1085 | goto out_err; |
| 1087 | offset = copied_syn; | ||
| 1088 | } | 1086 | } |
| 1089 | 1087 | ||
| 1090 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 1088 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
| @@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1118 | mss_now = tcp_send_mss(sk, &size_goal, flags); | 1116 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
| 1119 | 1117 | ||
| 1120 | /* Ok commence sending. */ | 1118 | /* Ok commence sending. */ |
| 1121 | iovlen = msg->msg_iter.nr_segs; | ||
| 1122 | iov = msg->msg_iter.iov; | ||
| 1123 | copied = 0; | 1119 | copied = 0; |
| 1124 | 1120 | ||
| 1125 | err = -EPIPE; | 1121 | err = -EPIPE; |
| @@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
| 1128 | 1124 | ||
| 1129 | sg = !!(sk->sk_route_caps & NETIF_F_SG); | 1125 | sg = !!(sk->sk_route_caps & NETIF_F_SG); |
| 1130 | 1126 | ||
| 1131 | while (--iovlen >= 0) { | 1127 | while (iov_iter_count(&msg->msg_iter)) { |
| 1132 | size_t seglen = iov->iov_len; | 1128 | int copy = 0; |
| 1133 | unsigned char __user *from = iov->iov_base; | 1129 | int max = size_goal; |
| 1134 | 1130 | ||
| 1135 | iov++; | 1131 | skb = tcp_write_queue_tail(sk); |
| 1136 | if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ | 1132 | if (tcp_send_head(sk)) { |
| 1137 | if (offset >= seglen) { | 1133 | if (skb->ip_summed == CHECKSUM_NONE) |
| 1138 | offset -= seglen; | 1134 | max = mss_now; |
| 1139 | continue; | 1135 | copy = max - skb->len; |
| 1140 | } | ||
| 1141 | seglen -= offset; | ||
| 1142 | from += offset; | ||
| 1143 | offset = 0; | ||
| 1144 | } | 1136 | } |
| 1145 | 1137 | ||
| 1146 | while (seglen > 0) { | 1138 | if (copy <= 0) { |
| 1147 | int copy = 0; | ||
| 1148 | int max = size_goal; | ||
| 1149 | |||
| 1150 | skb = tcp_write_queue_tail(sk); | ||
| 1151 | if (tcp_send_head(sk)) { | ||
| 1152 | if (skb->ip_summed == CHECKSUM_NONE) | ||
| 1153 | max = mss_now; | ||
| 1154 | copy = max - skb->len; | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | if (copy <= 0) { | ||
| 1158 | new_segment: | 1139 | new_segment: |
| 1159 | /* Allocate new segment. If the interface is SG, | 1140 | /* Allocate new segment. If the interface is SG, |
| 1160 | * allocate skb fitting to single page. | 1141 | * allocate skb fitting to single page. |
| 1161 | */ | 1142 | */ |
| 1162 | if (!sk_stream_memory_free(sk)) | 1143 | if (!sk_stream_memory_free(sk)) |
| 1163 | goto wait_for_sndbuf; | 1144 | goto wait_for_sndbuf; |
| 1164 | 1145 | ||
| 1165 | skb = sk_stream_alloc_skb(sk, | 1146 | skb = sk_stream_alloc_skb(sk, |
| 1166 | select_size(sk, sg), | 1147 | select_size(sk, sg), |
| 1167 | sk->sk_allocation); | 1148 | sk->sk_allocation); |
| 1168 | if (!skb) | 1149 | if (!skb) |
| 1169 | goto wait_for_memory; | 1150 | goto wait_for_memory; |
| 1170 | 1151 | ||
| 1171 | /* | 1152 | /* |
| 1172 | * Check whether we can use HW checksum. | 1153 | * Check whether we can use HW checksum. |
| 1173 | */ | 1154 | */ |
| 1174 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) | 1155 | if (sk->sk_route_caps & NETIF_F_ALL_CSUM) |
| 1175 | skb->ip_summed = CHECKSUM_PARTIAL; | 1156 | skb->ip_summed = CHECKSUM_PARTIAL; |
| 1176 | 1157 | ||
| 1177 | skb_entail(sk, skb); | 1158 | skb_entail(sk, skb); |
| 1178 | copy = size_goal; | 1159 | copy = size_goal; |
| 1179 | max = size_goal; | 1160 | max = size_goal; |
| 1180 | 1161 | ||
| 1181 | /* All packets are restored as if they have | 1162 | /* All packets are restored as if they have |
| 1182 | * already been sent. skb_mstamp isn't set to | 1163 | * already been sent. skb_mstamp isn't set to |
| 1183 | * avoid wrong rtt estimation. | 1164 | * avoid wrong rtt estimation. |
| 1184 | */ | 1165 | */ |
| 1185 | if (tp->repair) | 1166 | if (tp->repair) |
| 1186 | TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; | 1167 | TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; |
| 1187 | } | 1168 | } |
| 1188 | 1169 | ||
| 1189 | /* Try to append data to the end of skb. */ | 1170 | /* Try to append data to the end of skb. */ |
| 1190 | if (copy > seglen) | 1171 | if (copy > iov_iter_count(&msg->msg_iter)) |
| 1191 | copy = seglen; | 1172 | copy = iov_iter_count(&msg->msg_iter); |
| 1192 | 1173 | ||
| 1193 | /* Where to copy to? */ | 1174 | /* Where to copy to? */ |
| 1194 | if (skb_availroom(skb) > 0) { | 1175 | if (skb_availroom(skb) > 0) { |
| 1195 | /* We have some space in skb head. Superb! */ | 1176 | /* We have some space in skb head. Superb! */ |
| 1196 | copy = min_t(int, copy, skb_availroom(skb)); | 1177 | copy = min_t(int, copy, skb_availroom(skb)); |
| 1197 | err = skb_add_data_nocache(sk, skb, from, copy); | 1178 | err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); |
| 1198 | if (err) | 1179 | if (err) |
| 1199 | goto do_fault; | 1180 | goto do_fault; |
| 1200 | } else { | 1181 | } else { |
| 1201 | bool merge = true; | 1182 | bool merge = true; |
| 1202 | int i = skb_shinfo(skb)->nr_frags; | 1183 | int i = skb_shinfo(skb)->nr_frags; |
| 1203 | struct page_frag *pfrag = sk_page_frag(sk); | 1184 | struct page_frag *pfrag = sk_page_frag(sk); |
| 1204 | 1185 | ||
| 1205 | if (!sk_page_frag_refill(sk, pfrag)) | 1186 | if (!sk_page_frag_refill(sk, pfrag)) |
| 1206 | goto wait_for_memory; | 1187 | goto wait_for_memory; |
| 1207 | |||
| 1208 | if (!skb_can_coalesce(skb, i, pfrag->page, | ||
| 1209 | pfrag->offset)) { | ||
| 1210 | if (i == MAX_SKB_FRAGS || !sg) { | ||
| 1211 | tcp_mark_push(tp, skb); | ||
| 1212 | goto new_segment; | ||
| 1213 | } | ||
| 1214 | merge = false; | ||
| 1215 | } | ||
| 1216 | 1188 | ||
| 1217 | copy = min_t(int, copy, pfrag->size - pfrag->offset); | 1189 | if (!skb_can_coalesce(skb, i, pfrag->page, |
| 1218 | 1190 | pfrag->offset)) { | |
| 1219 | if (!sk_wmem_schedule(sk, copy)) | 1191 | if (i == MAX_SKB_FRAGS || !sg) { |
| 1220 | goto wait_for_memory; | 1192 | tcp_mark_push(tp, skb); |
| 1221 | 1193 | goto new_segment; | |
| 1222 | err = skb_copy_to_page_nocache(sk, from, skb, | ||
| 1223 | pfrag->page, | ||
| 1224 | pfrag->offset, | ||
| 1225 | copy); | ||
| 1226 | if (err) | ||
| 1227 | goto do_error; | ||
| 1228 | |||
| 1229 | /* Update the skb. */ | ||
| 1230 | if (merge) { | ||
| 1231 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | ||
| 1232 | } else { | ||
| 1233 | skb_fill_page_desc(skb, i, pfrag->page, | ||
| 1234 | pfrag->offset, copy); | ||
| 1235 | get_page(pfrag->page); | ||
| 1236 | } | 1194 | } |
| 1237 | pfrag->offset += copy; | 1195 | merge = false; |
| 1238 | } | 1196 | } |
| 1239 | 1197 | ||
| 1240 | if (!copied) | 1198 | copy = min_t(int, copy, pfrag->size - pfrag->offset); |
| 1241 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; | ||
| 1242 | 1199 | ||
| 1243 | tp->write_seq += copy; | 1200 | if (!sk_wmem_schedule(sk, copy)) |
| 1244 | TCP_SKB_CB(skb)->end_seq += copy; | 1201 | goto wait_for_memory; |
| 1245 | tcp_skb_pcount_set(skb, 0); | ||
| 1246 | 1202 | ||
| 1247 | from += copy; | 1203 | err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, |
| 1248 | copied += copy; | 1204 | pfrag->page, |
| 1249 | if ((seglen -= copy) == 0 && iovlen == 0) { | 1205 | pfrag->offset, |
| 1250 | tcp_tx_timestamp(sk, skb); | 1206 | copy); |
| 1251 | goto out; | 1207 | if (err) |
| 1208 | goto do_error; | ||
| 1209 | |||
| 1210 | /* Update the skb. */ | ||
| 1211 | if (merge) { | ||
| 1212 | skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | ||
| 1213 | } else { | ||
| 1214 | skb_fill_page_desc(skb, i, pfrag->page, | ||
| 1215 | pfrag->offset, copy); | ||
| 1216 | get_page(pfrag->page); | ||
| 1252 | } | 1217 | } |
| 1218 | pfrag->offset += copy; | ||
| 1219 | } | ||
| 1253 | 1220 | ||
| 1254 | if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) | 1221 | if (!copied) |
| 1255 | continue; | 1222 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; |
| 1223 | |||
| 1224 | tp->write_seq += copy; | ||
| 1225 | TCP_SKB_CB(skb)->end_seq += copy; | ||
| 1226 | tcp_skb_pcount_set(skb, 0); | ||
| 1227 | |||
| 1228 | copied += copy; | ||
| 1229 | if (!iov_iter_count(&msg->msg_iter)) { | ||
| 1230 | tcp_tx_timestamp(sk, skb); | ||
| 1231 | goto out; | ||
| 1232 | } | ||
| 1256 | 1233 | ||
| 1257 | if (forced_push(tp)) { | 1234 | if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) |
| 1258 | tcp_mark_push(tp, skb); | ||
| 1259 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); | ||
| 1260 | } else if (skb == tcp_send_head(sk)) | ||
| 1261 | tcp_push_one(sk, mss_now); | ||
| 1262 | continue; | 1235 | continue; |
| 1263 | 1236 | ||
| 1237 | if (forced_push(tp)) { | ||
| 1238 | tcp_mark_push(tp, skb); | ||
| 1239 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); | ||
| 1240 | } else if (skb == tcp_send_head(sk)) | ||
| 1241 | tcp_push_one(sk, mss_now); | ||
| 1242 | continue; | ||
| 1243 | |||
| 1264 | wait_for_sndbuf: | 1244 | wait_for_sndbuf: |
| 1265 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 1245 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
| 1266 | wait_for_memory: | 1246 | wait_for_memory: |
| 1267 | if (copied) | 1247 | if (copied) |
| 1268 | tcp_push(sk, flags & ~MSG_MORE, mss_now, | 1248 | tcp_push(sk, flags & ~MSG_MORE, mss_now, |
| 1269 | TCP_NAGLE_PUSH, size_goal); | 1249 | TCP_NAGLE_PUSH, size_goal); |
| 1270 | 1250 | ||
| 1271 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 1251 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
| 1272 | goto do_error; | 1252 | goto do_error; |
| 1273 | 1253 | ||
| 1274 | mss_now = tcp_send_mss(sk, &size_goal, flags); | 1254 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
| 1275 | } | ||
| 1276 | } | 1255 | } |
| 1277 | 1256 | ||
| 1278 | out: | 1257 | out: |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71fb37c70581..93c74829cbce 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | |||
| 4368 | if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) | 4368 | if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) |
| 4369 | goto err_free; | 4369 | goto err_free; |
| 4370 | 4370 | ||
| 4371 | if (memcpy_from_msg(skb_put(skb, size), msg, size)) | 4371 | if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size) |
| 4372 | goto err_free; | 4372 | goto err_free; |
| 4373 | 4373 | ||
| 4374 | TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; | 4374 | TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 20ab06b228ac..722c8bceaf9a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
| 3055 | { | 3055 | { |
| 3056 | struct tcp_sock *tp = tcp_sk(sk); | 3056 | struct tcp_sock *tp = tcp_sk(sk); |
| 3057 | struct tcp_fastopen_request *fo = tp->fastopen_req; | 3057 | struct tcp_fastopen_request *fo = tp->fastopen_req; |
| 3058 | int syn_loss = 0, space, err = 0; | 3058 | int syn_loss = 0, space, err = 0, copied; |
| 3059 | unsigned long last_syn_loss = 0; | 3059 | unsigned long last_syn_loss = 0; |
| 3060 | struct sk_buff *syn_data; | 3060 | struct sk_buff *syn_data; |
| 3061 | 3061 | ||
| @@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | |||
| 3093 | goto fallback; | 3093 | goto fallback; |
| 3094 | syn_data->ip_summed = CHECKSUM_PARTIAL; | 3094 | syn_data->ip_summed = CHECKSUM_PARTIAL; |
| 3095 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); | 3095 | memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); |
| 3096 | if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), | 3096 | copied = copy_from_iter(skb_put(syn_data, space), space, |
| 3097 | fo->data->msg_iter.iov, 0, space))) { | 3097 | &fo->data->msg_iter); |
| 3098 | if (unlikely(!copied)) { | ||
| 3098 | kfree_skb(syn_data); | 3099 | kfree_skb(syn_data); |
| 3099 | goto fallback; | 3100 | goto fallback; |
| 3100 | } | 3101 | } |
| 3102 | if (copied != space) { | ||
| 3103 | skb_trim(syn_data, copied); | ||
| 3104 | space = copied; | ||
| 3105 | } | ||
| 3101 | 3106 | ||
| 3102 | /* No more data pending in inet_wait_for_connect() */ | 3107 | /* No more data pending in inet_wait_for_connect() */ |
| 3103 | if (space == fo->size) | 3108 | if (space == fo->size) |
