aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2014-11-28 13:40:20 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2015-02-04 01:34:14 -0500
commit57be5bdad759b9dde8b0d0cc630782a1a4ac4b9f (patch)
tree12d1b9c40bd20aa5e5038382fd20da05f09b2881
parentcacdc7d2f9fa42e29b650e2879df42ea7d7833c1 (diff)
ip: convert tcp_sendmsg() to iov_iter primitives
patch is actually smaller than it seems to be - most of it is unindenting the inner loop body in tcp_sendmsg() itself... the bit in tcp_input.c is going to get reverted very soon - that's what memcpy_from_msg() will become, but not in this commit; let's keep it reasonably contained... There's one potentially subtle change here: in case of short copy from userland, mainline tcp_send_syn_data() discards the skb it has allocated and falls back to normal path, where we'll send as much as possible after rereading the same data again. This patch trims SYN+data skb instead - that way we don't need to copy from the same place twice. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--include/net/sock.h18
-rw-r--r--net/ipv4/tcp.c233
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_output.c11
4 files changed, 123 insertions, 141 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 15341499786c..1e45e599a3ab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
1803} 1803}
1804 1804
1805static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, 1805static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
1806 char __user *from, char *to, 1806 struct iov_iter *from, char *to,
1807 int copy, int offset) 1807 int copy, int offset)
1808{ 1808{
1809 if (skb->ip_summed == CHECKSUM_NONE) { 1809 if (skb->ip_summed == CHECKSUM_NONE) {
1810 int err = 0; 1810 __wsum csum = 0;
1811 __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); 1811 if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
1812 if (err) 1812 return -EFAULT;
1813 return err;
1814 skb->csum = csum_block_add(skb->csum, csum, offset); 1813 skb->csum = csum_block_add(skb->csum, csum, offset);
1815 } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { 1814 } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
1816 if (!access_ok(VERIFY_READ, from, copy) || 1815 if (copy_from_iter_nocache(to, copy, from) != copy)
1817 __copy_from_user_nocache(to, from, copy))
1818 return -EFAULT; 1816 return -EFAULT;
1819 } else if (copy_from_user(to, from, copy)) 1817 } else if (copy_from_iter(to, copy, from) != copy)
1820 return -EFAULT; 1818 return -EFAULT;
1821 1819
1822 return 0; 1820 return 0;
1823} 1821}
1824 1822
1825static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, 1823static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1826 char __user *from, int copy) 1824 struct iov_iter *from, int copy)
1827{ 1825{
1828 int err, offset = skb->len; 1826 int err, offset = skb->len;
1829 1827
@@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1835 return err; 1833 return err;
1836} 1834}
1837 1835
1838static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, 1836static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
1839 struct sk_buff *skb, 1837 struct sk_buff *skb,
1840 struct page *page, 1838 struct page *page,
1841 int off, int copy) 1839 int off, int copy)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3075723c729b..9d72a0fcd928 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1067int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1067int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1068 size_t size) 1068 size_t size)
1069{ 1069{
1070 const struct iovec *iov;
1071 struct tcp_sock *tp = tcp_sk(sk); 1070 struct tcp_sock *tp = tcp_sk(sk);
1072 struct sk_buff *skb; 1071 struct sk_buff *skb;
1073 int iovlen, flags, err, copied = 0; 1072 int flags, err, copied = 0;
1074 int mss_now = 0, size_goal, copied_syn = 0, offset = 0; 1073 int mss_now = 0, size_goal, copied_syn = 0;
1075 bool sg; 1074 bool sg;
1076 long timeo; 1075 long timeo;
1077 1076
@@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1084 goto out; 1083 goto out;
1085 else if (err) 1084 else if (err)
1086 goto out_err; 1085 goto out_err;
1087 offset = copied_syn;
1088 } 1086 }
1089 1087
1090 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1088 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1118 mss_now = tcp_send_mss(sk, &size_goal, flags); 1116 mss_now = tcp_send_mss(sk, &size_goal, flags);
1119 1117
1120 /* Ok commence sending. */ 1118 /* Ok commence sending. */
1121 iovlen = msg->msg_iter.nr_segs;
1122 iov = msg->msg_iter.iov;
1123 copied = 0; 1119 copied = 0;
1124 1120
1125 err = -EPIPE; 1121 err = -EPIPE;
@@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1128 1124
1129 sg = !!(sk->sk_route_caps & NETIF_F_SG); 1125 sg = !!(sk->sk_route_caps & NETIF_F_SG);
1130 1126
1131 while (--iovlen >= 0) { 1127 while (iov_iter_count(&msg->msg_iter)) {
1132 size_t seglen = iov->iov_len; 1128 int copy = 0;
1133 unsigned char __user *from = iov->iov_base; 1129 int max = size_goal;
1134 1130
1135 iov++; 1131 skb = tcp_write_queue_tail(sk);
1136 if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ 1132 if (tcp_send_head(sk)) {
1137 if (offset >= seglen) { 1133 if (skb->ip_summed == CHECKSUM_NONE)
1138 offset -= seglen; 1134 max = mss_now;
1139 continue; 1135 copy = max - skb->len;
1140 }
1141 seglen -= offset;
1142 from += offset;
1143 offset = 0;
1144 } 1136 }
1145 1137
1146 while (seglen > 0) { 1138 if (copy <= 0) {
1147 int copy = 0;
1148 int max = size_goal;
1149
1150 skb = tcp_write_queue_tail(sk);
1151 if (tcp_send_head(sk)) {
1152 if (skb->ip_summed == CHECKSUM_NONE)
1153 max = mss_now;
1154 copy = max - skb->len;
1155 }
1156
1157 if (copy <= 0) {
1158new_segment: 1139new_segment:
1159 /* Allocate new segment. If the interface is SG, 1140 /* Allocate new segment. If the interface is SG,
1160 * allocate skb fitting to single page. 1141 * allocate skb fitting to single page.
1161 */ 1142 */
1162 if (!sk_stream_memory_free(sk)) 1143 if (!sk_stream_memory_free(sk))
1163 goto wait_for_sndbuf; 1144 goto wait_for_sndbuf;
1164 1145
1165 skb = sk_stream_alloc_skb(sk, 1146 skb = sk_stream_alloc_skb(sk,
1166 select_size(sk, sg), 1147 select_size(sk, sg),
1167 sk->sk_allocation); 1148 sk->sk_allocation);
1168 if (!skb) 1149 if (!skb)
1169 goto wait_for_memory; 1150 goto wait_for_memory;
1170 1151
1171 /* 1152 /*
1172 * Check whether we can use HW checksum. 1153 * Check whether we can use HW checksum.
1173 */ 1154 */
1174 if (sk->sk_route_caps & NETIF_F_ALL_CSUM) 1155 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
1175 skb->ip_summed = CHECKSUM_PARTIAL; 1156 skb->ip_summed = CHECKSUM_PARTIAL;
1176 1157
1177 skb_entail(sk, skb); 1158 skb_entail(sk, skb);
1178 copy = size_goal; 1159 copy = size_goal;
1179 max = size_goal; 1160 max = size_goal;
1180 1161
1181 /* All packets are restored as if they have 1162 /* All packets are restored as if they have
1182 * already been sent. skb_mstamp isn't set to 1163 * already been sent. skb_mstamp isn't set to
1183 * avoid wrong rtt estimation. 1164 * avoid wrong rtt estimation.
1184 */ 1165 */
1185 if (tp->repair) 1166 if (tp->repair)
1186 TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; 1167 TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
1187 } 1168 }
1188 1169
1189 /* Try to append data to the end of skb. */ 1170 /* Try to append data to the end of skb. */
1190 if (copy > seglen) 1171 if (copy > iov_iter_count(&msg->msg_iter))
1191 copy = seglen; 1172 copy = iov_iter_count(&msg->msg_iter);
1192 1173
1193 /* Where to copy to? */ 1174 /* Where to copy to? */
1194 if (skb_availroom(skb) > 0) { 1175 if (skb_availroom(skb) > 0) {
1195 /* We have some space in skb head. Superb! */ 1176 /* We have some space in skb head. Superb! */
1196 copy = min_t(int, copy, skb_availroom(skb)); 1177 copy = min_t(int, copy, skb_availroom(skb));
1197 err = skb_add_data_nocache(sk, skb, from, copy); 1178 err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
1198 if (err) 1179 if (err)
1199 goto do_fault; 1180 goto do_fault;
1200 } else { 1181 } else {
1201 bool merge = true; 1182 bool merge = true;
1202 int i = skb_shinfo(skb)->nr_frags; 1183 int i = skb_shinfo(skb)->nr_frags;
1203 struct page_frag *pfrag = sk_page_frag(sk); 1184 struct page_frag *pfrag = sk_page_frag(sk);
1204 1185
1205 if (!sk_page_frag_refill(sk, pfrag)) 1186 if (!sk_page_frag_refill(sk, pfrag))
1206 goto wait_for_memory; 1187 goto wait_for_memory;
1207
1208 if (!skb_can_coalesce(skb, i, pfrag->page,
1209 pfrag->offset)) {
1210 if (i == MAX_SKB_FRAGS || !sg) {
1211 tcp_mark_push(tp, skb);
1212 goto new_segment;
1213 }
1214 merge = false;
1215 }
1216 1188
1217 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1189 if (!skb_can_coalesce(skb, i, pfrag->page,
1218 1190 pfrag->offset)) {
1219 if (!sk_wmem_schedule(sk, copy)) 1191 if (i == MAX_SKB_FRAGS || !sg) {
1220 goto wait_for_memory; 1192 tcp_mark_push(tp, skb);
1221 1193 goto new_segment;
1222 err = skb_copy_to_page_nocache(sk, from, skb,
1223 pfrag->page,
1224 pfrag->offset,
1225 copy);
1226 if (err)
1227 goto do_error;
1228
1229 /* Update the skb. */
1230 if (merge) {
1231 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1232 } else {
1233 skb_fill_page_desc(skb, i, pfrag->page,
1234 pfrag->offset, copy);
1235 get_page(pfrag->page);
1236 } 1194 }
1237 pfrag->offset += copy; 1195 merge = false;
1238 } 1196 }
1239 1197
1240 if (!copied) 1198 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1241 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1242 1199
1243 tp->write_seq += copy; 1200 if (!sk_wmem_schedule(sk, copy))
1244 TCP_SKB_CB(skb)->end_seq += copy; 1201 goto wait_for_memory;
1245 tcp_skb_pcount_set(skb, 0);
1246 1202
1247 from += copy; 1203 err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
1248 copied += copy; 1204 pfrag->page,
1249 if ((seglen -= copy) == 0 && iovlen == 0) { 1205 pfrag->offset,
1250 tcp_tx_timestamp(sk, skb); 1206 copy);
1251 goto out; 1207 if (err)
1208 goto do_error;
1209
1210 /* Update the skb. */
1211 if (merge) {
1212 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1213 } else {
1214 skb_fill_page_desc(skb, i, pfrag->page,
1215 pfrag->offset, copy);
1216 get_page(pfrag->page);
1252 } 1217 }
1218 pfrag->offset += copy;
1219 }
1253 1220
1254 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) 1221 if (!copied)
1255 continue; 1222 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1223
1224 tp->write_seq += copy;
1225 TCP_SKB_CB(skb)->end_seq += copy;
1226 tcp_skb_pcount_set(skb, 0);
1227
1228 copied += copy;
1229 if (!iov_iter_count(&msg->msg_iter)) {
1230 tcp_tx_timestamp(sk, skb);
1231 goto out;
1232 }
1256 1233
1257 if (forced_push(tp)) { 1234 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
1258 tcp_mark_push(tp, skb);
1259 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1260 } else if (skb == tcp_send_head(sk))
1261 tcp_push_one(sk, mss_now);
1262 continue; 1235 continue;
1263 1236
1237 if (forced_push(tp)) {
1238 tcp_mark_push(tp, skb);
1239 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1240 } else if (skb == tcp_send_head(sk))
1241 tcp_push_one(sk, mss_now);
1242 continue;
1243
1264wait_for_sndbuf: 1244wait_for_sndbuf:
1265 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1245 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1266wait_for_memory: 1246wait_for_memory:
1267 if (copied) 1247 if (copied)
1268 tcp_push(sk, flags & ~MSG_MORE, mss_now, 1248 tcp_push(sk, flags & ~MSG_MORE, mss_now,
1269 TCP_NAGLE_PUSH, size_goal); 1249 TCP_NAGLE_PUSH, size_goal);
1270 1250
1271 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1251 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1272 goto do_error; 1252 goto do_error;
1273 1253
1274 mss_now = tcp_send_mss(sk, &size_goal, flags); 1254 mss_now = tcp_send_mss(sk, &size_goal, flags);
1275 }
1276 } 1255 }
1277 1256
1278out: 1257out:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71fb37c70581..93c74829cbce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4368 if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) 4368 if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
4369 goto err_free; 4369 goto err_free;
4370 4370
4371 if (memcpy_from_msg(skb_put(skb, size), msg, size)) 4371 if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size)
4372 goto err_free; 4372 goto err_free;
4373 4373
4374 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; 4374 TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20ab06b228ac..722c8bceaf9a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3055{ 3055{
3056 struct tcp_sock *tp = tcp_sk(sk); 3056 struct tcp_sock *tp = tcp_sk(sk);
3057 struct tcp_fastopen_request *fo = tp->fastopen_req; 3057 struct tcp_fastopen_request *fo = tp->fastopen_req;
3058 int syn_loss = 0, space, err = 0; 3058 int syn_loss = 0, space, err = 0, copied;
3059 unsigned long last_syn_loss = 0; 3059 unsigned long last_syn_loss = 0;
3060 struct sk_buff *syn_data; 3060 struct sk_buff *syn_data;
3061 3061
@@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3093 goto fallback; 3093 goto fallback;
3094 syn_data->ip_summed = CHECKSUM_PARTIAL; 3094 syn_data->ip_summed = CHECKSUM_PARTIAL;
3095 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); 3095 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3096 if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), 3096 copied = copy_from_iter(skb_put(syn_data, space), space,
3097 fo->data->msg_iter.iov, 0, space))) { 3097 &fo->data->msg_iter);
3098 if (unlikely(!copied)) {
3098 kfree_skb(syn_data); 3099 kfree_skb(syn_data);
3099 goto fallback; 3100 goto fallback;
3100 } 3101 }
3102 if (copied != space) {
3103 skb_trim(syn_data, copied);
3104 space = copied;
3105 }
3101 3106
3102 /* No more data pending in inet_wait_for_connect() */ 3107 /* No more data pending in inet_wait_for_connect() */
3103 if (space == fo->size) 3108 if (space == fo->size)