aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-02-04 23:46:55 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-04 23:46:55 -0500
commitf2683b743f2334ef49a5361bf596dd1fbd2c9be4 (patch)
tree7f53b2614742238e966ba8a815ef6c5079422ee2
parent9878196578286c5ed494778ada01da094377a686 (diff)
parent57dd8a0735aabff4862025cf64ad94da3d80e620 (diff)
Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
More iov_iter work from Al Viro. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--crypto/af_alg.c40
-rw-r--r--crypto/algif_hash.c45
-rw-r--r--crypto/algif_skcipher.c74
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.c16
-rw-r--r--drivers/vhost/net.c91
-rw-r--r--drivers/vhost/scsi.c2
-rw-r--r--drivers/vhost/vhost.c6
-rw-r--r--fs/afs/rxrpc.c14
-rw-r--r--include/crypto/if_alg.h3
-rw-r--r--include/linux/skbuff.h14
-rw-r--r--include/linux/socket.h7
-rw-r--r--include/linux/uio.h6
-rw-r--r--include/linux/vmw_vmci_api.h2
-rw-r--r--include/net/ping.h2
-rw-r--r--include/net/sock.h18
-rw-r--r--include/net/udplite.h3
-rw-r--r--lib/Makefile2
-rw-r--r--lib/iovec.c87
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/iovec.c137
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ping.c17
-rw-r--r--net/ipv4/raw.c7
-rw-r--r--net/ipv4/tcp.c233
-rw-r--r--net/ipv4/tcp_output.c11
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c7
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/rxrpc/ar-output.c46
-rw-r--r--net/socket.c76
-rw-r--r--net/tipc/msg.c7
-rw-r--r--net/tipc/socket.c14
-rw-r--r--net/vmw_vsock/vmci_transport.c3
33 files changed, 320 insertions, 686 deletions
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 4665b79c729a..eb78fe8a60c8 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -338,49 +338,31 @@ static const struct net_proto_family alg_family = {
338 .owner = THIS_MODULE, 338 .owner = THIS_MODULE,
339}; 339};
340 340
341int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, 341int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
342 int write)
343{ 342{
344 unsigned long from = (unsigned long)addr; 343 size_t off;
345 unsigned long npages; 344 ssize_t n;
346 unsigned off; 345 int npages, i;
347 int err;
348 int i;
349
350 err = -EFAULT;
351 if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len))
352 goto out;
353
354 off = from & ~PAGE_MASK;
355 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
356 if (npages > ALG_MAX_PAGES)
357 npages = ALG_MAX_PAGES;
358 346
359 err = get_user_pages_fast(from, npages, write, sgl->pages); 347 n = iov_iter_get_pages(iter, sgl->pages, len, ALG_MAX_PAGES, &off);
360 if (err < 0) 348 if (n < 0)
361 goto out; 349 return n;
362 350
363 npages = err; 351 npages = PAGE_ALIGN(off + n);
364 err = -EINVAL;
365 if (WARN_ON(npages == 0)) 352 if (WARN_ON(npages == 0))
366 goto out; 353 return -EINVAL;
367
368 err = 0;
369 354
370 sg_init_table(sgl->sg, npages); 355 sg_init_table(sgl->sg, npages);
371 356
372 for (i = 0; i < npages; i++) { 357 for (i = 0, len = n; i < npages; i++) {
373 int plen = min_t(int, len, PAGE_SIZE - off); 358 int plen = min_t(int, len, PAGE_SIZE - off);
374 359
375 sg_set_page(sgl->sg + i, sgl->pages[i], plen, off); 360 sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);
376 361
377 off = 0; 362 off = 0;
378 len -= plen; 363 len -= plen;
379 err += plen;
380 } 364 }
381 365 return n;
382out:
383 return err;
384} 366}
385EXPORT_SYMBOL_GPL(af_alg_make_sg); 367EXPORT_SYMBOL_GPL(af_alg_make_sg);
386 368
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 01f56eb7816e..01da360bdb55 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -41,8 +41,6 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
41 struct sock *sk = sock->sk; 41 struct sock *sk = sock->sk;
42 struct alg_sock *ask = alg_sk(sk); 42 struct alg_sock *ask = alg_sk(sk);
43 struct hash_ctx *ctx = ask->private; 43 struct hash_ctx *ctx = ask->private;
44 unsigned long iovlen;
45 const struct iovec *iov;
46 long copied = 0; 44 long copied = 0;
47 int err; 45 int err;
48 46
@@ -58,37 +56,28 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
58 56
59 ctx->more = 0; 57 ctx->more = 0;
60 58
61 for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0; 59 while (iov_iter_count(&msg->msg_iter)) {
62 iovlen--, iov++) { 60 int len = iov_iter_count(&msg->msg_iter);
63 unsigned long seglen = iov->iov_len;
64 char __user *from = iov->iov_base;
65 61
66 while (seglen) { 62 if (len > limit)
67 int len = min_t(unsigned long, seglen, limit); 63 len = limit;
68 int newlen;
69 64
70 newlen = af_alg_make_sg(&ctx->sgl, from, len, 0); 65 len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len);
71 if (newlen < 0) { 66 if (len < 0) {
72 err = copied ? 0 : newlen; 67 err = copied ? 0 : len;
73 goto unlock; 68 goto unlock;
74 } 69 }
75
76 ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL,
77 newlen);
78
79 err = af_alg_wait_for_completion(
80 crypto_ahash_update(&ctx->req),
81 &ctx->completion);
82 70
83 af_alg_free_sg(&ctx->sgl); 71 ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, len);
84 72
85 if (err) 73 err = af_alg_wait_for_completion(crypto_ahash_update(&ctx->req),
86 goto unlock; 74 &ctx->completion);
75 af_alg_free_sg(&ctx->sgl);
76 if (err)
77 goto unlock;
87 78
88 seglen -= newlen; 79 copied += len;
89 from += newlen; 80 iov_iter_advance(&msg->msg_iter, len);
90 copied += newlen;
91 }
92 } 81 }
93 82
94 err = 0; 83 err = 0;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c12207c8dde9..37110fd68adf 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -426,67 +426,59 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
426 &ctx->req)); 426 &ctx->req));
427 struct skcipher_sg_list *sgl; 427 struct skcipher_sg_list *sgl;
428 struct scatterlist *sg; 428 struct scatterlist *sg;
429 unsigned long iovlen;
430 const struct iovec *iov;
431 int err = -EAGAIN; 429 int err = -EAGAIN;
432 int used; 430 int used;
433 long copied = 0; 431 long copied = 0;
434 432
435 lock_sock(sk); 433 lock_sock(sk);
436 for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0; 434 while (iov_iter_count(&msg->msg_iter)) {
437 iovlen--, iov++) { 435 sgl = list_first_entry(&ctx->tsgl,
438 unsigned long seglen = iov->iov_len; 436 struct skcipher_sg_list, list);
439 char __user *from = iov->iov_base; 437 sg = sgl->sg;
440
441 while (seglen) {
442 sgl = list_first_entry(&ctx->tsgl,
443 struct skcipher_sg_list, list);
444 sg = sgl->sg;
445
446 while (!sg->length)
447 sg++;
448
449 if (!ctx->used) {
450 err = skcipher_wait_for_data(sk, flags);
451 if (err)
452 goto unlock;
453 }
454 438
455 used = min_t(unsigned long, ctx->used, seglen); 439 while (!sg->length)
440 sg++;
456 441
457 used = af_alg_make_sg(&ctx->rsgl, from, used, 1); 442 used = ctx->used;
458 err = used; 443 if (!used) {
459 if (err < 0) 444 err = skcipher_wait_for_data(sk, flags);
445 if (err)
460 goto unlock; 446 goto unlock;
447 }
448
449 used = min_t(unsigned long, used, iov_iter_count(&msg->msg_iter));
450
451 used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
452 err = used;
453 if (err < 0)
454 goto unlock;
461 455
462 if (ctx->more || used < ctx->used) 456 if (ctx->more || used < ctx->used)
463 used -= used % bs; 457 used -= used % bs;
464 458
465 err = -EINVAL; 459 err = -EINVAL;
466 if (!used) 460 if (!used)
467 goto free; 461 goto free;
468 462
469 ablkcipher_request_set_crypt(&ctx->req, sg, 463 ablkcipher_request_set_crypt(&ctx->req, sg,
470 ctx->rsgl.sg, used, 464 ctx->rsgl.sg, used,
471 ctx->iv); 465 ctx->iv);
472 466
473 err = af_alg_wait_for_completion( 467 err = af_alg_wait_for_completion(
474 ctx->enc ? 468 ctx->enc ?
475 crypto_ablkcipher_encrypt(&ctx->req) : 469 crypto_ablkcipher_encrypt(&ctx->req) :
476 crypto_ablkcipher_decrypt(&ctx->req), 470 crypto_ablkcipher_decrypt(&ctx->req),
477 &ctx->completion); 471 &ctx->completion);
478 472
479free: 473free:
480 af_alg_free_sg(&ctx->rsgl); 474 af_alg_free_sg(&ctx->rsgl);
481 475
482 if (err) 476 if (err)
483 goto unlock; 477 goto unlock;
484 478
485 copied += used; 479 copied += used;
486 from += used; 480 skcipher_pull_sgl(sk, used);
487 seglen -= used; 481 iov_iter_advance(&msg->msg_iter, used);
488 skcipher_pull_sgl(sk, used);
489 }
490 } 482 }
491 483
492 err = 0; 484 err = 0;
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 7aaaf51e1596..35f19a683822 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -370,12 +370,12 @@ static int __qp_memcpy_to_queue(struct vmci_queue *queue,
370 to_copy = size - bytes_copied; 370 to_copy = size - bytes_copied;
371 371
372 if (is_iovec) { 372 if (is_iovec) {
373 struct iovec *iov = (struct iovec *)src; 373 struct msghdr *msg = (struct msghdr *)src;
374 int err; 374 int err;
375 375
376 /* The iovec will track bytes_copied internally. */ 376 /* The iovec will track bytes_copied internally. */
377 err = memcpy_fromiovec((u8 *)va + page_offset, 377 err = memcpy_from_msg((u8 *)va + page_offset,
378 iov, to_copy); 378 msg, to_copy);
379 if (err != 0) { 379 if (err != 0) {
380 if (kernel_if->host) 380 if (kernel_if->host)
381 kunmap(kernel_if->u.h.page[page_index]); 381 kunmap(kernel_if->u.h.page[page_index]);
@@ -580,7 +580,7 @@ static int qp_memcpy_from_queue(void *dest,
580 */ 580 */
581static int qp_memcpy_to_queue_iov(struct vmci_queue *queue, 581static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
582 u64 queue_offset, 582 u64 queue_offset,
583 const void *src, 583 const void *msg,
584 size_t src_offset, size_t size) 584 size_t src_offset, size_t size)
585{ 585{
586 586
@@ -588,7 +588,7 @@ static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
588 * We ignore src_offset because src is really a struct iovec * and will 588 * We ignore src_offset because src is really a struct iovec * and will
589 * maintain offset internally. 589 * maintain offset internally.
590 */ 590 */
591 return __qp_memcpy_to_queue(queue, queue_offset, src, size, true); 591 return __qp_memcpy_to_queue(queue, queue_offset, msg, size, true);
592} 592}
593 593
594/* 594/*
@@ -3223,13 +3223,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_peek);
3223 * of bytes enqueued or < 0 on error. 3223 * of bytes enqueued or < 0 on error.
3224 */ 3224 */
3225ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, 3225ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
3226 void *iov, 3226 struct msghdr *msg,
3227 size_t iov_size, 3227 size_t iov_size,
3228 int buf_type) 3228 int buf_type)
3229{ 3229{
3230 ssize_t result; 3230 ssize_t result;
3231 3231
3232 if (!qpair || !iov) 3232 if (!qpair)
3233 return VMCI_ERROR_INVALID_ARGS; 3233 return VMCI_ERROR_INVALID_ARGS;
3234 3234
3235 qp_lock(qpair); 3235 qp_lock(qpair);
@@ -3238,7 +3238,7 @@ ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
3238 result = qp_enqueue_locked(qpair->produce_q, 3238 result = qp_enqueue_locked(qpair->produce_q,
3239 qpair->consume_q, 3239 qpair->consume_q,
3240 qpair->produce_q_size, 3240 qpair->produce_q_size,
3241 iov, iov_size, 3241 msg, iov_size,
3242 qp_memcpy_to_queue_iov); 3242 qp_memcpy_to_queue_iov);
3243 3243
3244 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && 3244 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 6906f76332f4..e022cc40303d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -84,10 +84,6 @@ struct vhost_net_ubuf_ref {
84 84
85struct vhost_net_virtqueue { 85struct vhost_net_virtqueue {
86 struct vhost_virtqueue vq; 86 struct vhost_virtqueue vq;
87 /* hdr is used to store the virtio header.
88 * Since each iovec has >= 1 byte length, we never need more than
89 * header length entries to store the header. */
90 struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
91 size_t vhost_hlen; 87 size_t vhost_hlen;
92 size_t sock_hlen; 88 size_t sock_hlen;
93 /* vhost zerocopy support fields below: */ 89 /* vhost zerocopy support fields below: */
@@ -235,44 +231,6 @@ static bool vhost_sock_zcopy(struct socket *sock)
235 sock_flag(sock->sk, SOCK_ZEROCOPY); 231 sock_flag(sock->sk, SOCK_ZEROCOPY);
236} 232}
237 233
238/* Pop first len bytes from iovec. Return number of segments used. */
239static int move_iovec_hdr(struct iovec *from, struct iovec *to,
240 size_t len, int iov_count)
241{
242 int seg = 0;
243 size_t size;
244
245 while (len && seg < iov_count) {
246 size = min(from->iov_len, len);
247 to->iov_base = from->iov_base;
248 to->iov_len = size;
249 from->iov_len -= size;
250 from->iov_base += size;
251 len -= size;
252 ++from;
253 ++to;
254 ++seg;
255 }
256 return seg;
257}
258/* Copy iovec entries for len bytes from iovec. */
259static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
260 size_t len, int iovcount)
261{
262 int seg = 0;
263 size_t size;
264
265 while (len && seg < iovcount) {
266 size = min(from->iov_len, len);
267 to->iov_base = from->iov_base;
268 to->iov_len = size;
269 len -= size;
270 ++from;
271 ++to;
272 ++seg;
273 }
274}
275
276/* In case of DMA done not in order in lower device driver for some reason. 234/* In case of DMA done not in order in lower device driver for some reason.
277 * upend_idx is used to track end of used idx, done_idx is used to track head 235 * upend_idx is used to track end of used idx, done_idx is used to track head
278 * of used idx. Once lower device DMA done contiguously, we will signal KVM 236 * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -336,7 +294,7 @@ static void handle_tx(struct vhost_net *net)
336{ 294{
337 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; 295 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
338 struct vhost_virtqueue *vq = &nvq->vq; 296 struct vhost_virtqueue *vq = &nvq->vq;
339 unsigned out, in, s; 297 unsigned out, in;
340 int head; 298 int head;
341 struct msghdr msg = { 299 struct msghdr msg = {
342 .msg_name = NULL, 300 .msg_name = NULL,
@@ -395,16 +353,17 @@ static void handle_tx(struct vhost_net *net)
395 break; 353 break;
396 } 354 }
397 /* Skip header. TODO: support TSO. */ 355 /* Skip header. TODO: support TSO. */
398 s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
399 len = iov_length(vq->iov, out); 356 len = iov_length(vq->iov, out);
400 iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); 357 iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
358 iov_iter_advance(&msg.msg_iter, hdr_size);
401 /* Sanity check */ 359 /* Sanity check */
402 if (!len) { 360 if (!iov_iter_count(&msg.msg_iter)) {
403 vq_err(vq, "Unexpected header len for TX: " 361 vq_err(vq, "Unexpected header len for TX: "
404 "%zd expected %zd\n", 362 "%zd expected %zd\n",
405 iov_length(nvq->hdr, s), hdr_size); 363 len, hdr_size);
406 break; 364 break;
407 } 365 }
366 len = iov_iter_count(&msg.msg_iter);
408 367
409 zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN 368 zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
410 && (nvq->upend_idx + 1) % UIO_MAXIOV != 369 && (nvq->upend_idx + 1) % UIO_MAXIOV !=
@@ -569,9 +528,9 @@ static void handle_rx(struct vhost_net *net)
569 .msg_controllen = 0, 528 .msg_controllen = 0,
570 .msg_flags = MSG_DONTWAIT, 529 .msg_flags = MSG_DONTWAIT,
571 }; 530 };
572 struct virtio_net_hdr_mrg_rxbuf hdr = { 531 struct virtio_net_hdr hdr = {
573 .hdr.flags = 0, 532 .flags = 0,
574 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE 533 .gso_type = VIRTIO_NET_HDR_GSO_NONE
575 }; 534 };
576 size_t total_len = 0; 535 size_t total_len = 0;
577 int err, mergeable; 536 int err, mergeable;
@@ -579,6 +538,7 @@ static void handle_rx(struct vhost_net *net)
579 size_t vhost_hlen, sock_hlen; 538 size_t vhost_hlen, sock_hlen;
580 size_t vhost_len, sock_len; 539 size_t vhost_len, sock_len;
581 struct socket *sock; 540 struct socket *sock;
541 struct iov_iter fixup;
582 542
583 mutex_lock(&vq->mutex); 543 mutex_lock(&vq->mutex);
584 sock = vq->private_data; 544 sock = vq->private_data;
@@ -623,14 +583,19 @@ static void handle_rx(struct vhost_net *net)
623 break; 583 break;
624 } 584 }
625 /* We don't need to be notified again. */ 585 /* We don't need to be notified again. */
626 if (unlikely((vhost_hlen))) 586 iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
627 /* Skip header. TODO: support TSO. */ 587 fixup = msg.msg_iter;
628 move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in); 588 if (unlikely((vhost_hlen))) {
629 else 589 /* We will supply the header ourselves
630 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: 590 * TODO: support TSO.
631 * needed because recvmsg can modify msg_iov. */ 591 */
632 copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in); 592 iov_iter_advance(&msg.msg_iter, vhost_hlen);
633 iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len); 593 } else {
594 /* It'll come from socket; we'll need to patch
595 * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF
596 */
597 iov_iter_advance(&fixup, sizeof(hdr));
598 }
634 err = sock->ops->recvmsg(NULL, sock, &msg, 599 err = sock->ops->recvmsg(NULL, sock, &msg,
635 sock_len, MSG_DONTWAIT | MSG_TRUNC); 600 sock_len, MSG_DONTWAIT | MSG_TRUNC);
636 /* Userspace might have consumed the packet meanwhile: 601 /* Userspace might have consumed the packet meanwhile:
@@ -642,18 +607,18 @@ static void handle_rx(struct vhost_net *net)
642 vhost_discard_vq_desc(vq, headcount); 607 vhost_discard_vq_desc(vq, headcount);
643 continue; 608 continue;
644 } 609 }
610 /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
645 if (unlikely(vhost_hlen) && 611 if (unlikely(vhost_hlen) &&
646 memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0, 612 copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) {
647 vhost_hlen)) {
648 vq_err(vq, "Unable to write vnet_hdr at addr %p\n", 613 vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
649 vq->iov->iov_base); 614 vq->iov->iov_base);
650 break; 615 break;
651 } 616 }
652 /* TODO: Should check and handle checksum. */ 617 /* Supply (or replace) ->num_buffers if VIRTIO_NET_F_MRG_RXBUF
618 * TODO: Should check and handle checksum.
619 */
653 if (likely(mergeable) && 620 if (likely(mergeable) &&
654 memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount, 621 copy_to_iter(&headcount, 2, &fixup) != 2) {
655 offsetof(typeof(hdr), num_buffers),
656 sizeof hdr.num_buffers)) {
657 vq_err(vq, "Failed num_buffers write"); 622 vq_err(vq, "Failed num_buffers write");
658 vhost_discard_vq_desc(vq, headcount); 623 vhost_discard_vq_desc(vq, headcount);
659 break; 624 break;
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index d695b1673ae5..dc78d87e0fc2 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1079,7 +1079,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
1079 req_size, vq->iov[0].iov_len); 1079 req_size, vq->iov[0].iov_len);
1080 break; 1080 break;
1081 } 1081 }
1082 ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size); 1082 ret = copy_from_user(req, vq->iov[0].iov_base, req_size);
1083 if (unlikely(ret)) { 1083 if (unlikely(ret)) {
1084 vq_err(vq, "Faulted on virtio_scsi_cmd_req\n"); 1084 vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
1085 break; 1085 break;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index cb807d0ea498..2ee28266fd07 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1125,6 +1125,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
1125 struct vring_desc desc; 1125 struct vring_desc desc;
1126 unsigned int i = 0, count, found = 0; 1126 unsigned int i = 0, count, found = 0;
1127 u32 len = vhost32_to_cpu(vq, indirect->len); 1127 u32 len = vhost32_to_cpu(vq, indirect->len);
1128 struct iov_iter from;
1128 int ret; 1129 int ret;
1129 1130
1130 /* Sanity check */ 1131 /* Sanity check */
@@ -1142,6 +1143,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
1142 vq_err(vq, "Translation failure %d in indirect.\n", ret); 1143 vq_err(vq, "Translation failure %d in indirect.\n", ret);
1143 return ret; 1144 return ret;
1144 } 1145 }
1146 iov_iter_init(&from, READ, vq->indirect, ret, len);
1145 1147
1146 /* We will use the result as an address to read from, so most 1148 /* We will use the result as an address to read from, so most
1147 * architectures only need a compiler barrier here. */ 1149 * architectures only need a compiler barrier here. */
@@ -1164,8 +1166,8 @@ static int get_indirect(struct vhost_virtqueue *vq,
1164 i, count); 1166 i, count);
1165 return -EINVAL; 1167 return -EINVAL;
1166 } 1168 }
1167 if (unlikely(memcpy_fromiovec((unsigned char *)&desc, 1169 if (unlikely(copy_from_iter(&desc, sizeof(desc), &from) !=
1168 vq->indirect, sizeof desc))) { 1170 sizeof(desc))) {
1169 vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", 1171 vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
1170 i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); 1172 i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
1171 return -EINVAL; 1173 return -EINVAL;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 06e14bfb3496..dbc732e9a5c0 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
306 306
307 _debug("- range %u-%u%s", 307 _debug("- range %u-%u%s",
308 offset, to, msg->msg_flags ? " [more]" : ""); 308 offset, to, msg->msg_flags ? " [more]" : "");
309 iov_iter_init(&msg->msg_iter, WRITE, 309 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
310 (struct iovec *) iov, 1, to - offset); 310 iov, 1, to - offset);
311 311
312 /* have to change the state *before* sending the last 312 /* have to change the state *before* sending the last
313 * packet as RxRPC might give us the reply before it 313 * packet as RxRPC might give us the reply before it
@@ -384,7 +384,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
384 384
385 msg.msg_name = NULL; 385 msg.msg_name = NULL;
386 msg.msg_namelen = 0; 386 msg.msg_namelen = 0;
387 iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1, 387 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
388 call->request_size); 388 call->request_size);
389 msg.msg_control = NULL; 389 msg.msg_control = NULL;
390 msg.msg_controllen = 0; 390 msg.msg_controllen = 0;
@@ -770,7 +770,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
770void afs_send_empty_reply(struct afs_call *call) 770void afs_send_empty_reply(struct afs_call *call)
771{ 771{
772 struct msghdr msg; 772 struct msghdr msg;
773 struct iovec iov[1]; 773 struct kvec iov[1];
774 774
775 _enter(""); 775 _enter("");
776 776
@@ -778,7 +778,7 @@ void afs_send_empty_reply(struct afs_call *call)
778 iov[0].iov_len = 0; 778 iov[0].iov_len = 0;
779 msg.msg_name = NULL; 779 msg.msg_name = NULL;
780 msg.msg_namelen = 0; 780 msg.msg_namelen = 0;
781 iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0); /* WTF? */ 781 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */
782 msg.msg_control = NULL; 782 msg.msg_control = NULL;
783 msg.msg_controllen = 0; 783 msg.msg_controllen = 0;
784 msg.msg_flags = 0; 784 msg.msg_flags = 0;
@@ -805,7 +805,7 @@ void afs_send_empty_reply(struct afs_call *call)
805void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) 805void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
806{ 806{
807 struct msghdr msg; 807 struct msghdr msg;
808 struct iovec iov[1]; 808 struct kvec iov[1];
809 int n; 809 int n;
810 810
811 _enter(""); 811 _enter("");
@@ -814,7 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
814 iov[0].iov_len = len; 814 iov[0].iov_len = len;
815 msg.msg_name = NULL; 815 msg.msg_name = NULL;
816 msg.msg_namelen = 0; 816 msg.msg_namelen = 0;
817 iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len); 817 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
818 msg.msg_control = NULL; 818 msg.msg_control = NULL;
819 msg.msg_controllen = 0; 819 msg.msg_controllen = 0;
820 msg.msg_flags = 0; 820 msg.msg_flags = 0;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index cd62bf4289e9..88ea64e9a91c 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -67,8 +67,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
67int af_alg_release(struct socket *sock); 67int af_alg_release(struct socket *sock);
68int af_alg_accept(struct sock *sk, struct socket *newsock); 68int af_alg_accept(struct sock *sk, struct socket *newsock);
69 69
70int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, 70int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
71 int write);
72void af_alg_free_sg(struct af_alg_sgl *sgl); 71void af_alg_free_sg(struct af_alg_sgl *sgl);
73 72
74int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con); 73int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5405dfe02572..111e665455c3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2487,19 +2487,18 @@ static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
2487} 2487}
2488 2488
2489static inline int skb_add_data(struct sk_buff *skb, 2489static inline int skb_add_data(struct sk_buff *skb,
2490 char __user *from, int copy) 2490 struct iov_iter *from, int copy)
2491{ 2491{
2492 const int off = skb->len; 2492 const int off = skb->len;
2493 2493
2494 if (skb->ip_summed == CHECKSUM_NONE) { 2494 if (skb->ip_summed == CHECKSUM_NONE) {
2495 int err = 0; 2495 __wsum csum = 0;
2496 __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy), 2496 if (csum_and_copy_from_iter(skb_put(skb, copy), copy,
2497 copy, 0, &err); 2497 &csum, from) == copy) {
2498 if (!err) {
2499 skb->csum = csum_block_add(skb->csum, csum, off); 2498 skb->csum = csum_block_add(skb->csum, csum, off);
2500 return 0; 2499 return 0;
2501 } 2500 }
2502 } else if (!copy_from_user(skb_put(skb, copy), from, copy)) 2501 } else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy)
2503 return 0; 2502 return 0;
2504 2503
2505 __skb_trim(skb, off); 2504 __skb_trim(skb, off);
@@ -2696,8 +2695,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
2696 2695
2697static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) 2696static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
2698{ 2697{
2699 /* XXX: stripping const */ 2698 return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
2700 return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len);
2701} 2699}
2702 2700
2703static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) 2701static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6e49a14365dc..5c19cba34dce 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -318,13 +318,6 @@ struct ucred {
318/* IPX options */ 318/* IPX options */
319#define IPX_TYPE 1 319#define IPX_TYPE 1
320 320
321extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
322 struct iovec *iov,
323 int offset,
324 unsigned int len, __wsum *csump);
325extern unsigned long iov_pages(const struct iovec *iov, int offset,
326 unsigned long nr_segs);
327
328extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); 321extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
329extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); 322extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
330 323
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 1c5e453f7ea9..3e0cb4ea3905 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -135,10 +135,4 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
135size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); 135size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
136size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); 136size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
137 137
138int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
139int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
140 int offset, int len);
141int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
142 int offset, int len);
143
144#endif 138#endif
diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h
index 5691f752ce8f..63df3a2a8ce5 100644
--- a/include/linux/vmw_vmci_api.h
+++ b/include/linux/vmw_vmci_api.h
@@ -74,7 +74,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
74ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size, 74ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size,
75 int mode); 75 int mode);
76ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, 76ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
77 void *iov, size_t iov_size, int mode); 77 struct msghdr *msg, size_t iov_size, int mode);
78ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, 78ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
79 struct msghdr *msg, size_t iov_size, int mode); 79 struct msghdr *msg, size_t iov_size, int mode);
80ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, 80ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size,
diff --git a/include/net/ping.h b/include/net/ping.h
index f074060bc5de..cc16d413f681 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -59,7 +59,7 @@ extern struct pingv6_ops pingv6_ops;
59 59
60struct pingfakehdr { 60struct pingfakehdr {
61 struct icmphdr icmph; 61 struct icmphdr icmph;
62 struct iovec *iov; 62 struct msghdr *msg;
63 sa_family_t family; 63 sa_family_t family;
64 __wsum wcheck; 64 __wsum wcheck;
65}; 65};
diff --git a/include/net/sock.h b/include/net/sock.h
index 511ef7c8889b..d28b8fededd6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
1803} 1803}
1804 1804
1805static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, 1805static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
1806 char __user *from, char *to, 1806 struct iov_iter *from, char *to,
1807 int copy, int offset) 1807 int copy, int offset)
1808{ 1808{
1809 if (skb->ip_summed == CHECKSUM_NONE) { 1809 if (skb->ip_summed == CHECKSUM_NONE) {
1810 int err = 0; 1810 __wsum csum = 0;
1811 __wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); 1811 if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
1812 if (err) 1812 return -EFAULT;
1813 return err;
1814 skb->csum = csum_block_add(skb->csum, csum, offset); 1813 skb->csum = csum_block_add(skb->csum, csum, offset);
1815 } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { 1814 } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
1816 if (!access_ok(VERIFY_READ, from, copy) || 1815 if (copy_from_iter_nocache(to, copy, from) != copy)
1817 __copy_from_user_nocache(to, from, copy))
1818 return -EFAULT; 1816 return -EFAULT;
1819 } else if (copy_from_user(to, from, copy)) 1817 } else if (copy_from_iter(to, copy, from) != copy)
1820 return -EFAULT; 1818 return -EFAULT;
1821 1819
1822 return 0; 1820 return 0;
1823} 1821}
1824 1822
1825static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, 1823static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1826 char __user *from, int copy) 1824 struct iov_iter *from, int copy)
1827{ 1825{
1828 int err, offset = skb->len; 1826 int err, offset = skb->len;
1829 1827
@@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
1835 return err; 1833 return err;
1836} 1834}
1837 1835
1838static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, 1836static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
1839 struct sk_buff *skb, 1837 struct sk_buff *skb,
1840 struct page *page, 1838 struct page *page,
1841 int off, int copy) 1839 int off, int copy)
diff --git a/include/net/udplite.h b/include/net/udplite.h
index ae7c8d1fbcad..80761938b9a7 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -20,8 +20,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset,
20 int len, int odd, struct sk_buff *skb) 20 int len, int odd, struct sk_buff *skb)
21{ 21{
22 struct msghdr *msg = from; 22 struct msghdr *msg = from;
23 /* XXX: stripping const */ 23 return copy_from_iter(to, len, &msg->msg_iter) != len ? -EFAULT : 0;
24 return memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len);
25} 24}
26 25
27/* Designate sk as UDP-Lite socket */ 26/* Designate sk as UDP-Lite socket */
diff --git a/lib/Makefile b/lib/Makefile
index a8cf98d14199..7db78934ec07 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,7 +24,7 @@ obj-y += lockref.o
24 24
25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 25obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
26 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 26 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
27 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ 27 gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \
28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ 28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o 29 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
30obj-y += string_helpers.o 30obj-y += string_helpers.o
diff --git a/lib/iovec.c b/lib/iovec.c
deleted file mode 100644
index 2d99cb4a5006..000000000000
--- a/lib/iovec.c
+++ /dev/null
@@ -1,87 +0,0 @@
1#include <linux/uaccess.h>
2#include <linux/export.h>
3#include <linux/uio.h>
4
5/*
6 * Copy iovec to kernel. Returns -EFAULT on error.
7 *
8 * Note: this modifies the original iovec.
9 */
10
11int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
12{
13 while (len > 0) {
14 if (iov->iov_len) {
15 int copy = min_t(unsigned int, len, iov->iov_len);
16 if (copy_from_user(kdata, iov->iov_base, copy))
17 return -EFAULT;
18 len -= copy;
19 kdata += copy;
20 iov->iov_base += copy;
21 iov->iov_len -= copy;
22 }
23 iov++;
24 }
25
26 return 0;
27}
28EXPORT_SYMBOL(memcpy_fromiovec);
29
30/*
31 * Copy kernel to iovec. Returns -EFAULT on error.
32 */
33
34int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
35 int offset, int len)
36{
37 int copy;
38 for (; len > 0; ++iov) {
39 /* Skip over the finished iovecs */
40 if (unlikely(offset >= iov->iov_len)) {
41 offset -= iov->iov_len;
42 continue;
43 }
44 copy = min_t(unsigned int, iov->iov_len - offset, len);
45 if (copy_to_user(iov->iov_base + offset, kdata, copy))
46 return -EFAULT;
47 offset = 0;
48 kdata += copy;
49 len -= copy;
50 }
51
52 return 0;
53}
54EXPORT_SYMBOL(memcpy_toiovecend);
55
56/*
57 * Copy iovec to kernel. Returns -EFAULT on error.
58 */
59
60int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
61 int offset, int len)
62{
63 /* No data? Done! */
64 if (len == 0)
65 return 0;
66
67 /* Skip over the finished iovecs */
68 while (offset >= iov->iov_len) {
69 offset -= iov->iov_len;
70 iov++;
71 }
72
73 while (len > 0) {
74 u8 __user *base = iov->iov_base + offset;
75 int copy = min_t(unsigned int, len, iov->iov_len - offset);
76
77 offset = 0;
78 if (copy_from_user(kdata, base, copy))
79 return -EFAULT;
80 len -= copy;
81 kdata += copy;
82 iov++;
83 }
84
85 return 0;
86}
87EXPORT_SYMBOL(memcpy_fromiovecend);
diff --git a/net/core/Makefile b/net/core/Makefile
index 235e6c50708d..fec0856dd6c0 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the Linux networking core. 2# Makefile for the Linux networking core.
3# 3#
4 4
5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ 5obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o 6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
diff --git a/net/core/iovec.c b/net/core/iovec.c
deleted file mode 100644
index dcbe98b3726a..000000000000
--- a/net/core/iovec.c
+++ /dev/null
@@ -1,137 +0,0 @@
1/*
2 * iovec manipulation routines.
3 *
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 * Fixes:
11 * Andrew Lunn : Errors in iovec copying.
12 * Pedro Roque : Added memcpy_fromiovecend and
13 * csum_..._fromiovecend.
14 * Andi Kleen : fixed error handling for 2.1
15 * Alexey Kuznetsov: 2.1 optimisations
16 * Andi Kleen : Fix csum*fromiovecend for IPv6.
17 */
18
19#include <linux/errno.h>
20#include <linux/module.h>
21#include <linux/kernel.h>
22#include <linux/mm.h>
23#include <linux/net.h>
24#include <linux/in6.h>
25#include <asm/uaccess.h>
26#include <asm/byteorder.h>
27#include <net/checksum.h>
28#include <net/sock.h>
29
30/*
31 * And now for the all-in-one: copy and checksum from a user iovec
32 * directly to a datagram
33 * Calls to csum_partial but the last must be in 32 bit chunks
34 *
35 * ip_build_xmit must ensure that when fragmenting only the last
36 * call to this function will be unaligned also.
37 */
38int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
39 int offset, unsigned int len, __wsum *csump)
40{
41 __wsum csum = *csump;
42 int partial_cnt = 0, err = 0;
43
44 /* Skip over the finished iovecs */
45 while (offset >= iov->iov_len) {
46 offset -= iov->iov_len;
47 iov++;
48 }
49
50 while (len > 0) {
51 u8 __user *base = iov->iov_base + offset;
52 int copy = min_t(unsigned int, len, iov->iov_len - offset);
53
54 offset = 0;
55
56 /* There is a remnant from previous iov. */
57 if (partial_cnt) {
58 int par_len = 4 - partial_cnt;
59
60 /* iov component is too short ... */
61 if (par_len > copy) {
62 if (copy_from_user(kdata, base, copy))
63 goto out_fault;
64 kdata += copy;
65 base += copy;
66 partial_cnt += copy;
67 len -= copy;
68 iov++;
69 if (len)
70 continue;
71 *csump = csum_partial(kdata - partial_cnt,
72 partial_cnt, csum);
73 goto out;
74 }
75 if (copy_from_user(kdata, base, par_len))
76 goto out_fault;
77 csum = csum_partial(kdata - partial_cnt, 4, csum);
78 kdata += par_len;
79 base += par_len;
80 copy -= par_len;
81 len -= par_len;
82 partial_cnt = 0;
83 }
84
85 if (len > copy) {
86 partial_cnt = copy % 4;
87 if (partial_cnt) {
88 copy -= partial_cnt;
89 if (copy_from_user(kdata + copy, base + copy,
90 partial_cnt))
91 goto out_fault;
92 }
93 }
94
95 if (copy) {
96 csum = csum_and_copy_from_user(base, kdata, copy,
97 csum, &err);
98 if (err)
99 goto out;
100 }
101 len -= copy + partial_cnt;
102 kdata += copy + partial_cnt;
103 iov++;
104 }
105 *csump = csum;
106out:
107 return err;
108
109out_fault:
110 err = -EFAULT;
111 goto out;
112}
113EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
114
115unsigned long iov_pages(const struct iovec *iov, int offset,
116 unsigned long nr_segs)
117{
118 unsigned long seg, base;
119 int pages = 0, len, size;
120
121 while (nr_segs && (offset >= iov->iov_len)) {
122 offset -= iov->iov_len;
123 ++iov;
124 --nr_segs;
125 }
126
127 for (seg = 0; seg < nr_segs; seg++) {
128 base = (unsigned long)iov[seg].iov_base + offset;
129 len = iov[seg].iov_len - offset;
130 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
131 pages += size;
132 offset = 0;
133 }
134
135 return pages;
136}
137EXPORT_SYMBOL(iov_pages);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b50861b22b6b..f998bc87ae38 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -755,13 +755,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
755 struct msghdr *msg = from; 755 struct msghdr *msg = from;
756 756
757 if (skb->ip_summed == CHECKSUM_PARTIAL) { 757 if (skb->ip_summed == CHECKSUM_PARTIAL) {
758 /* XXX: stripping const */ 758 if (copy_from_iter(to, len, &msg->msg_iter) != len)
759 if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0)
760 return -EFAULT; 759 return -EFAULT;
761 } else { 760 } else {
762 __wsum csum = 0; 761 __wsum csum = 0;
763 /* XXX: stripping const */ 762 if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len)
764 if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0)
765 return -EFAULT; 763 return -EFAULT;
766 skb->csum = csum_block_add(skb->csum, csum, odd); 764 skb->csum = csum_block_add(skb->csum, csum, odd);
767 } 765 }
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2a3720fb5a5f..e9f66e1cda50 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -599,18 +599,18 @@ int ping_getfrag(void *from, char *to,
599 struct pingfakehdr *pfh = (struct pingfakehdr *)from; 599 struct pingfakehdr *pfh = (struct pingfakehdr *)from;
600 600
601 if (offset == 0) { 601 if (offset == 0) {
602 if (fraglen < sizeof(struct icmphdr)) 602 fraglen -= sizeof(struct icmphdr);
603 if (fraglen < 0)
603 BUG(); 604 BUG();
604 if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr), 605 if (csum_and_copy_from_iter(to + sizeof(struct icmphdr),
605 pfh->iov, 0, fraglen - sizeof(struct icmphdr), 606 fraglen, &pfh->wcheck,
606 &pfh->wcheck)) 607 &pfh->msg->msg_iter) != fraglen)
607 return -EFAULT; 608 return -EFAULT;
608 } else if (offset < sizeof(struct icmphdr)) { 609 } else if (offset < sizeof(struct icmphdr)) {
609 BUG(); 610 BUG();
610 } else { 611 } else {
611 if (csum_partial_copy_fromiovecend 612 if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck,
612 (to, pfh->iov, offset - sizeof(struct icmphdr), 613 &pfh->msg->msg_iter) != fraglen)
613 fraglen, &pfh->wcheck))
614 return -EFAULT; 614 return -EFAULT;
615 } 615 }
616 616
@@ -811,8 +811,7 @@ back_from_confirm:
811 pfh.icmph.checksum = 0; 811 pfh.icmph.checksum = 0;
812 pfh.icmph.un.echo.id = inet->inet_sport; 812 pfh.icmph.un.echo.id = inet->inet_sport;
813 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; 813 pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
814 /* XXX: stripping const */ 814 pfh.msg = msg;
815 pfh.iov = (struct iovec *)msg->msg_iter.iov;
816 pfh.wcheck = 0; 815 pfh.wcheck = 0;
817 pfh.family = AF_INET; 816 pfh.family = AF_INET;
818 817
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 0bb68df5055d..f027a708b7e0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -337,7 +337,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
337} 337}
338 338
339static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, 339static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
340 void *from, size_t length, 340 struct msghdr *msg, size_t length,
341 struct rtable **rtp, 341 struct rtable **rtp,
342 unsigned int flags) 342 unsigned int flags)
343{ 343{
@@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
382 382
383 skb->transport_header = skb->network_header; 383 skb->transport_header = skb->network_header;
384 err = -EFAULT; 384 err = -EFAULT;
385 if (memcpy_fromiovecend((void *)iph, from, 0, length)) 385 if (memcpy_from_msg(iph, msg, length))
386 goto error_free; 386 goto error_free;
387 387
388 iphlen = iph->ihl * 4; 388 iphlen = iph->ihl * 4;
@@ -625,8 +625,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
625back_from_confirm: 625back_from_confirm:
626 626
627 if (inet->hdrincl) 627 if (inet->hdrincl)
628 /* XXX: stripping const */ 628 err = raw_send_hdrinc(sk, &fl4, msg, len,
629 err = raw_send_hdrinc(sk, &fl4, (struct iovec *)msg->msg_iter.iov, len,
630 &rt, msg->msg_flags); 629 &rt, msg->msg_flags);
631 630
632 else { 631 else {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3075723c729b..9d72a0fcd928 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1067int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1067int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1068 size_t size) 1068 size_t size)
1069{ 1069{
1070 const struct iovec *iov;
1071 struct tcp_sock *tp = tcp_sk(sk); 1070 struct tcp_sock *tp = tcp_sk(sk);
1072 struct sk_buff *skb; 1071 struct sk_buff *skb;
1073 int iovlen, flags, err, copied = 0; 1072 int flags, err, copied = 0;
1074 int mss_now = 0, size_goal, copied_syn = 0, offset = 0; 1073 int mss_now = 0, size_goal, copied_syn = 0;
1075 bool sg; 1074 bool sg;
1076 long timeo; 1075 long timeo;
1077 1076
@@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1084 goto out; 1083 goto out;
1085 else if (err) 1084 else if (err)
1086 goto out_err; 1085 goto out_err;
1087 offset = copied_syn;
1088 } 1086 }
1089 1087
1090 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 1088 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1118 mss_now = tcp_send_mss(sk, &size_goal, flags); 1116 mss_now = tcp_send_mss(sk, &size_goal, flags);
1119 1117
1120 /* Ok commence sending. */ 1118 /* Ok commence sending. */
1121 iovlen = msg->msg_iter.nr_segs;
1122 iov = msg->msg_iter.iov;
1123 copied = 0; 1119 copied = 0;
1124 1120
1125 err = -EPIPE; 1121 err = -EPIPE;
@@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1128 1124
1129 sg = !!(sk->sk_route_caps & NETIF_F_SG); 1125 sg = !!(sk->sk_route_caps & NETIF_F_SG);
1130 1126
1131 while (--iovlen >= 0) { 1127 while (iov_iter_count(&msg->msg_iter)) {
1132 size_t seglen = iov->iov_len; 1128 int copy = 0;
1133 unsigned char __user *from = iov->iov_base; 1129 int max = size_goal;
1134 1130
1135 iov++; 1131 skb = tcp_write_queue_tail(sk);
1136 if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ 1132 if (tcp_send_head(sk)) {
1137 if (offset >= seglen) { 1133 if (skb->ip_summed == CHECKSUM_NONE)
1138 offset -= seglen; 1134 max = mss_now;
1139 continue; 1135 copy = max - skb->len;
1140 }
1141 seglen -= offset;
1142 from += offset;
1143 offset = 0;
1144 } 1136 }
1145 1137
1146 while (seglen > 0) { 1138 if (copy <= 0) {
1147 int copy = 0;
1148 int max = size_goal;
1149
1150 skb = tcp_write_queue_tail(sk);
1151 if (tcp_send_head(sk)) {
1152 if (skb->ip_summed == CHECKSUM_NONE)
1153 max = mss_now;
1154 copy = max - skb->len;
1155 }
1156
1157 if (copy <= 0) {
1158new_segment: 1139new_segment:
1159 /* Allocate new segment. If the interface is SG, 1140 /* Allocate new segment. If the interface is SG,
1160 * allocate skb fitting to single page. 1141 * allocate skb fitting to single page.
1161 */ 1142 */
1162 if (!sk_stream_memory_free(sk)) 1143 if (!sk_stream_memory_free(sk))
1163 goto wait_for_sndbuf; 1144 goto wait_for_sndbuf;
1164 1145
1165 skb = sk_stream_alloc_skb(sk, 1146 skb = sk_stream_alloc_skb(sk,
1166 select_size(sk, sg), 1147 select_size(sk, sg),
1167 sk->sk_allocation); 1148 sk->sk_allocation);
1168 if (!skb) 1149 if (!skb)
1169 goto wait_for_memory; 1150 goto wait_for_memory;
1170 1151
1171 /* 1152 /*
1172 * Check whether we can use HW checksum. 1153 * Check whether we can use HW checksum.
1173 */ 1154 */
1174 if (sk->sk_route_caps & NETIF_F_ALL_CSUM) 1155 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
1175 skb->ip_summed = CHECKSUM_PARTIAL; 1156 skb->ip_summed = CHECKSUM_PARTIAL;
1176 1157
1177 skb_entail(sk, skb); 1158 skb_entail(sk, skb);
1178 copy = size_goal; 1159 copy = size_goal;
1179 max = size_goal; 1160 max = size_goal;
1180 1161
1181 /* All packets are restored as if they have 1162 /* All packets are restored as if they have
1182 * already been sent. skb_mstamp isn't set to 1163 * already been sent. skb_mstamp isn't set to
1183 * avoid wrong rtt estimation. 1164 * avoid wrong rtt estimation.
1184 */ 1165 */
1185 if (tp->repair) 1166 if (tp->repair)
1186 TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; 1167 TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
1187 } 1168 }
1188 1169
1189 /* Try to append data to the end of skb. */ 1170 /* Try to append data to the end of skb. */
1190 if (copy > seglen) 1171 if (copy > iov_iter_count(&msg->msg_iter))
1191 copy = seglen; 1172 copy = iov_iter_count(&msg->msg_iter);
1192 1173
1193 /* Where to copy to? */ 1174 /* Where to copy to? */
1194 if (skb_availroom(skb) > 0) { 1175 if (skb_availroom(skb) > 0) {
1195 /* We have some space in skb head. Superb! */ 1176 /* We have some space in skb head. Superb! */
1196 copy = min_t(int, copy, skb_availroom(skb)); 1177 copy = min_t(int, copy, skb_availroom(skb));
1197 err = skb_add_data_nocache(sk, skb, from, copy); 1178 err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
1198 if (err) 1179 if (err)
1199 goto do_fault; 1180 goto do_fault;
1200 } else { 1181 } else {
1201 bool merge = true; 1182 bool merge = true;
1202 int i = skb_shinfo(skb)->nr_frags; 1183 int i = skb_shinfo(skb)->nr_frags;
1203 struct page_frag *pfrag = sk_page_frag(sk); 1184 struct page_frag *pfrag = sk_page_frag(sk);
1204 1185
1205 if (!sk_page_frag_refill(sk, pfrag)) 1186 if (!sk_page_frag_refill(sk, pfrag))
1206 goto wait_for_memory; 1187 goto wait_for_memory;
1207
1208 if (!skb_can_coalesce(skb, i, pfrag->page,
1209 pfrag->offset)) {
1210 if (i == MAX_SKB_FRAGS || !sg) {
1211 tcp_mark_push(tp, skb);
1212 goto new_segment;
1213 }
1214 merge = false;
1215 }
1216 1188
1217 copy = min_t(int, copy, pfrag->size - pfrag->offset); 1189 if (!skb_can_coalesce(skb, i, pfrag->page,
1218 1190 pfrag->offset)) {
1219 if (!sk_wmem_schedule(sk, copy)) 1191 if (i == MAX_SKB_FRAGS || !sg) {
1220 goto wait_for_memory; 1192 tcp_mark_push(tp, skb);
1221 1193 goto new_segment;
1222 err = skb_copy_to_page_nocache(sk, from, skb,
1223 pfrag->page,
1224 pfrag->offset,
1225 copy);
1226 if (err)
1227 goto do_error;
1228
1229 /* Update the skb. */
1230 if (merge) {
1231 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1232 } else {
1233 skb_fill_page_desc(skb, i, pfrag->page,
1234 pfrag->offset, copy);
1235 get_page(pfrag->page);
1236 } 1194 }
1237 pfrag->offset += copy; 1195 merge = false;
1238 } 1196 }
1239 1197
1240 if (!copied) 1198 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1241 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1242 1199
1243 tp->write_seq += copy; 1200 if (!sk_wmem_schedule(sk, copy))
1244 TCP_SKB_CB(skb)->end_seq += copy; 1201 goto wait_for_memory;
1245 tcp_skb_pcount_set(skb, 0);
1246 1202
1247 from += copy; 1203 err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
1248 copied += copy; 1204 pfrag->page,
1249 if ((seglen -= copy) == 0 && iovlen == 0) { 1205 pfrag->offset,
1250 tcp_tx_timestamp(sk, skb); 1206 copy);
1251 goto out; 1207 if (err)
1208 goto do_error;
1209
1210 /* Update the skb. */
1211 if (merge) {
1212 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1213 } else {
1214 skb_fill_page_desc(skb, i, pfrag->page,
1215 pfrag->offset, copy);
1216 get_page(pfrag->page);
1252 } 1217 }
1218 pfrag->offset += copy;
1219 }
1253 1220
1254 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) 1221 if (!copied)
1255 continue; 1222 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1223
1224 tp->write_seq += copy;
1225 TCP_SKB_CB(skb)->end_seq += copy;
1226 tcp_skb_pcount_set(skb, 0);
1227
1228 copied += copy;
1229 if (!iov_iter_count(&msg->msg_iter)) {
1230 tcp_tx_timestamp(sk, skb);
1231 goto out;
1232 }
1256 1233
1257 if (forced_push(tp)) { 1234 if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
1258 tcp_mark_push(tp, skb);
1259 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1260 } else if (skb == tcp_send_head(sk))
1261 tcp_push_one(sk, mss_now);
1262 continue; 1235 continue;
1263 1236
1237 if (forced_push(tp)) {
1238 tcp_mark_push(tp, skb);
1239 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1240 } else if (skb == tcp_send_head(sk))
1241 tcp_push_one(sk, mss_now);
1242 continue;
1243
1264wait_for_sndbuf: 1244wait_for_sndbuf:
1265 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1245 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1266wait_for_memory: 1246wait_for_memory:
1267 if (copied) 1247 if (copied)
1268 tcp_push(sk, flags & ~MSG_MORE, mss_now, 1248 tcp_push(sk, flags & ~MSG_MORE, mss_now,
1269 TCP_NAGLE_PUSH, size_goal); 1249 TCP_NAGLE_PUSH, size_goal);
1270 1250
1271 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1251 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1272 goto do_error; 1252 goto do_error;
1273 1253
1274 mss_now = tcp_send_mss(sk, &size_goal, flags); 1254 mss_now = tcp_send_mss(sk, &size_goal, flags);
1275 }
1276 } 1255 }
1277 1256
1278out: 1257out:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1b326ed46f7b..4fcc9a768849 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3055{ 3055{
3056 struct tcp_sock *tp = tcp_sk(sk); 3056 struct tcp_sock *tp = tcp_sk(sk);
3057 struct tcp_fastopen_request *fo = tp->fastopen_req; 3057 struct tcp_fastopen_request *fo = tp->fastopen_req;
3058 int syn_loss = 0, space, err = 0; 3058 int syn_loss = 0, space, err = 0, copied;
3059 unsigned long last_syn_loss = 0; 3059 unsigned long last_syn_loss = 0;
3060 struct sk_buff *syn_data; 3060 struct sk_buff *syn_data;
3061 3061
@@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
3093 goto fallback; 3093 goto fallback;
3094 syn_data->ip_summed = CHECKSUM_PARTIAL; 3094 syn_data->ip_summed = CHECKSUM_PARTIAL;
3095 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); 3095 memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
3096 if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), 3096 copied = copy_from_iter(skb_put(syn_data, space), space,
3097 fo->data->msg_iter.iov, 0, space))) { 3097 &fo->data->msg_iter);
3098 if (unlikely(!copied)) {
3098 kfree_skb(syn_data); 3099 kfree_skb(syn_data);
3099 goto fallback; 3100 goto fallback;
3100 } 3101 }
3102 if (copied != space) {
3103 skb_trim(syn_data, copied);
3104 space = copied;
3105 }
3101 3106
3102 /* No more data pending in inet_wait_for_connect() */ 3107 /* No more data pending in inet_wait_for_connect() */
3103 if (space == fo->size) 3108 if (space == fo->size)
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 2d3148378a1f..bd46f736f61d 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -163,8 +163,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
163 pfh.icmph.checksum = 0; 163 pfh.icmph.checksum = 0;
164 pfh.icmph.un.echo.id = inet->inet_sport; 164 pfh.icmph.un.echo.id = inet->inet_sport;
165 pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; 165 pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
166 /* XXX: stripping const */ 166 pfh.msg = msg;
167 pfh.iov = (struct iovec *)msg->msg_iter.iov;
168 pfh.wcheck = 0; 167 pfh.wcheck = 0;
169 pfh.family = AF_INET6; 168 pfh.family = AF_INET6;
170 169
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ee25631f8c29..dae7f1a1e464 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -609,7 +609,7 @@ out:
609 return err; 609 return err;
610} 610}
611 611
612static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, 612static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
613 struct flowi6 *fl6, struct dst_entry **dstp, 613 struct flowi6 *fl6, struct dst_entry **dstp,
614 unsigned int flags) 614 unsigned int flags)
615{ 615{
@@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
648 skb->ip_summed = CHECKSUM_NONE; 648 skb->ip_summed = CHECKSUM_NONE;
649 649
650 skb->transport_header = skb->network_header; 650 skb->transport_header = skb->network_header;
651 err = memcpy_fromiovecend((void *)iph, from, 0, length); 651 err = memcpy_from_msg(iph, msg, length);
652 if (err) 652 if (err)
653 goto error_fault; 653 goto error_fault;
654 654
@@ -886,8 +886,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
886 886
887back_from_confirm: 887back_from_confirm:
888 if (inet->hdrincl) 888 if (inet->hdrincl)
889 /* XXX: stripping const */ 889 err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
890 err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags);
891 else { 890 else {
892 lock_sock(sk); 891 lock_sock(sk);
893 err = ip6_append_data(sk, raw6_getfrag, &rfv, 892 err = ip6_append_data(sk, raw6_getfrag, &rfv,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 155854802d44..6feb16d5e1b8 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2298,7 +2298,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
2298 goto out; 2298 goto out;
2299 } 2299 }
2300 2300
2301 /* It's a really convoluted way for userland to ask for mmaped
2302 * sendmsg(), but that's what we've got...
2303 */
2301 if (netlink_tx_is_mmaped(sk) && 2304 if (netlink_tx_is_mmaped(sk) &&
2305 msg->msg_iter.type == ITER_IOVEC &&
2306 msg->msg_iter.nr_segs == 1 &&
2302 msg->msg_iter.iov->iov_base == NULL) { 2307 msg->msg_iter.iov->iov_base == NULL) {
2303 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, 2308 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2304 &scm); 2309 &scm);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index e1a9373e5979..8331c95e1522 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -232,10 +232,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
232 call->state != RXRPC_CALL_SERVER_SEND_REPLY) { 232 call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
233 ret = -EPROTO; /* request phase complete for this client call */ 233 ret = -EPROTO; /* request phase complete for this client call */
234 } else { 234 } else {
235 mm_segment_t oldfs = get_fs();
236 set_fs(KERNEL_DS);
237 ret = rxrpc_send_data(NULL, call->socket, call, msg, len); 235 ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
238 set_fs(oldfs);
239 } 236 }
240 237
241 release_sock(&call->socket->sk); 238 release_sock(&call->socket->sk);
@@ -529,13 +526,11 @@ static int rxrpc_send_data(struct kiocb *iocb,
529 struct msghdr *msg, size_t len) 526 struct msghdr *msg, size_t len)
530{ 527{
531 struct rxrpc_skb_priv *sp; 528 struct rxrpc_skb_priv *sp;
532 unsigned char __user *from;
533 struct sk_buff *skb; 529 struct sk_buff *skb;
534 const struct iovec *iov;
535 struct sock *sk = &rx->sk; 530 struct sock *sk = &rx->sk;
536 long timeo; 531 long timeo;
537 bool more; 532 bool more;
538 int ret, ioc, segment, copied; 533 int ret, copied;
539 534
540 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 535 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
541 536
@@ -545,25 +540,17 @@ static int rxrpc_send_data(struct kiocb *iocb,
545 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 540 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
546 return -EPIPE; 541 return -EPIPE;
547 542
548 iov = msg->msg_iter.iov;
549 ioc = msg->msg_iter.nr_segs - 1;
550 from = iov->iov_base;
551 segment = iov->iov_len;
552 iov++;
553 more = msg->msg_flags & MSG_MORE; 543 more = msg->msg_flags & MSG_MORE;
554 544
555 skb = call->tx_pending; 545 skb = call->tx_pending;
556 call->tx_pending = NULL; 546 call->tx_pending = NULL;
557 547
558 copied = 0; 548 copied = 0;
559 do { 549 if (len > iov_iter_count(&msg->msg_iter))
550 len = iov_iter_count(&msg->msg_iter);
551 while (len) {
560 int copy; 552 int copy;
561 553
562 if (segment > len)
563 segment = len;
564
565 _debug("SEGMENT %d @%p", segment, from);
566
567 if (!skb) { 554 if (!skb) {
568 size_t size, chunk, max, space; 555 size_t size, chunk, max, space;
569 556
@@ -631,13 +618,13 @@ static int rxrpc_send_data(struct kiocb *iocb,
631 /* append next segment of data to the current buffer */ 618 /* append next segment of data to the current buffer */
632 copy = skb_tailroom(skb); 619 copy = skb_tailroom(skb);
633 ASSERTCMP(copy, >, 0); 620 ASSERTCMP(copy, >, 0);
634 if (copy > segment) 621 if (copy > len)
635 copy = segment; 622 copy = len;
636 if (copy > sp->remain) 623 if (copy > sp->remain)
637 copy = sp->remain; 624 copy = sp->remain;
638 625
639 _debug("add"); 626 _debug("add");
640 ret = skb_add_data(skb, from, copy); 627 ret = skb_add_data(skb, &msg->msg_iter, copy);
641 _debug("added"); 628 _debug("added");
642 if (ret < 0) 629 if (ret < 0)
643 goto efault; 630 goto efault;
@@ -646,18 +633,6 @@ static int rxrpc_send_data(struct kiocb *iocb,
646 copied += copy; 633 copied += copy;
647 634
648 len -= copy; 635 len -= copy;
649 segment -= copy;
650 from += copy;
651 while (segment == 0 && ioc > 0) {
652 from = iov->iov_base;
653 segment = iov->iov_len;
654 iov++;
655 ioc--;
656 }
657 if (len == 0) {
658 segment = 0;
659 ioc = 0;
660 }
661 636
662 /* check for the far side aborting the call or a network error 637 /* check for the far side aborting the call or a network error
663 * occurring */ 638 * occurring */
@@ -665,7 +640,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
665 goto call_aborted; 640 goto call_aborted;
666 641
667 /* add the packet to the send queue if it's now full */ 642 /* add the packet to the send queue if it's now full */
668 if (sp->remain <= 0 || (segment == 0 && !more)) { 643 if (sp->remain <= 0 || (!len && !more)) {
669 struct rxrpc_connection *conn = call->conn; 644 struct rxrpc_connection *conn = call->conn;
670 uint32_t seq; 645 uint32_t seq;
671 size_t pad; 646 size_t pad;
@@ -711,11 +686,10 @@ static int rxrpc_send_data(struct kiocb *iocb,
711 686
712 memcpy(skb->head, &sp->hdr, 687 memcpy(skb->head, &sp->hdr,
713 sizeof(struct rxrpc_header)); 688 sizeof(struct rxrpc_header));
714 rxrpc_queue_packet(call, skb, segment == 0 && !more); 689 rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more);
715 skb = NULL; 690 skb = NULL;
716 } 691 }
717 692 }
718 } while (segment > 0);
719 693
720success: 694success:
721 ret = copied; 695 ret = copied;
diff --git a/net/socket.c b/net/socket.c
index 3326d67482ac..bbedbfcb42c2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -113,10 +113,8 @@ unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly; 113unsigned int sysctl_net_busy_poll __read_mostly;
114#endif 114#endif
115 115
116static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117 unsigned long nr_segs, loff_t pos); 117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
118static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
119 unsigned long nr_segs, loff_t pos);
120static int sock_mmap(struct file *file, struct vm_area_struct *vma); 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
121 119
122static int sock_close(struct inode *inode, struct file *file); 120static int sock_close(struct inode *inode, struct file *file);
@@ -142,8 +140,10 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
142static const struct file_operations socket_file_ops = { 140static const struct file_operations socket_file_ops = {
143 .owner = THIS_MODULE, 141 .owner = THIS_MODULE,
144 .llseek = no_llseek, 142 .llseek = no_llseek,
145 .aio_read = sock_aio_read, 143 .read = new_sync_read,
146 .aio_write = sock_aio_write, 144 .write = new_sync_write,
145 .read_iter = sock_read_iter,
146 .write_iter = sock_write_iter,
147 .poll = sock_poll, 147 .poll = sock_poll,
148 .unlocked_ioctl = sock_ioctl, 148 .unlocked_ioctl = sock_ioctl,
149#ifdef CONFIG_COMPAT 149#ifdef CONFIG_COMPAT
@@ -845,63 +845,47 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
845 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 845 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
846} 846}
847 847
848static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 848static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
849 struct file *file, const struct iovec *iov,
850 unsigned long nr_segs)
851{ 849{
850 struct file *file = iocb->ki_filp;
852 struct socket *sock = file->private_data; 851 struct socket *sock = file->private_data;
852 struct msghdr msg = {.msg_iter = *to};
853 ssize_t res;
853 854
854 msg->msg_name = NULL; 855 if (file->f_flags & O_NONBLOCK)
855 msg->msg_namelen = 0; 856 msg.msg_flags = MSG_DONTWAIT;
856 msg->msg_control = NULL;
857 msg->msg_controllen = 0;
858 iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, iocb->ki_nbytes);
859 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
860 857
861 return __sock_recvmsg(iocb, sock, msg, iocb->ki_nbytes, msg->msg_flags); 858 if (iocb->ki_pos != 0)
862}
863
864static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
865 unsigned long nr_segs, loff_t pos)
866{
867 struct msghdr msg;
868
869 if (pos != 0)
870 return -ESPIPE; 859 return -ESPIPE;
871 860
872 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ 861 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
873 return 0; 862 return 0;
874 863
875 return do_sock_read(&msg, iocb, iocb->ki_filp, iov, nr_segs); 864 res = __sock_recvmsg(iocb, sock, &msg,
865 iocb->ki_nbytes, msg.msg_flags);
866 *to = msg.msg_iter;
867 return res;
876} 868}
877 869
878static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 870static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
879 struct file *file, const struct iovec *iov,
880 unsigned long nr_segs)
881{ 871{
872 struct file *file = iocb->ki_filp;
882 struct socket *sock = file->private_data; 873 struct socket *sock = file->private_data;
874 struct msghdr msg = {.msg_iter = *from};
875 ssize_t res;
883 876
884 msg->msg_name = NULL; 877 if (iocb->ki_pos != 0)
885 msg->msg_namelen = 0; 878 return -ESPIPE;
886 msg->msg_control = NULL;
887 msg->msg_controllen = 0;
888 iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes);
889 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
890 if (sock->type == SOCK_SEQPACKET)
891 msg->msg_flags |= MSG_EOR;
892
893 return __sock_sendmsg(iocb, sock, msg, iocb->ki_nbytes);
894}
895 879
896static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 880 if (file->f_flags & O_NONBLOCK)
897 unsigned long nr_segs, loff_t pos) 881 msg.msg_flags = MSG_DONTWAIT;
898{
899 struct msghdr msg;
900 882
901 if (pos != 0) 883 if (sock->type == SOCK_SEQPACKET)
902 return -ESPIPE; 884 msg.msg_flags |= MSG_EOR;
903 885
904 return do_sock_write(&msg, iocb, iocb->ki_filp, iov, nr_segs); 886 res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
887 *from = msg.msg_iter;
888 return res;
905} 889}
906 890
907/* 891/*
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 18aba9e99345..da67c8d3edc6 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -189,7 +189,6 @@ err:
189 * tipc_msg_build - create buffer chain containing specified header and data 189 * tipc_msg_build - create buffer chain containing specified header and data
190 * @mhdr: Message header, to be prepended to data 190 * @mhdr: Message header, to be prepended to data
191 * @m: User message 191 * @m: User message
192 * @offset: Posision in iov to start copying from
193 * @dsz: Total length of user data 192 * @dsz: Total length of user data
194 * @pktmax: Max packet size that can be used 193 * @pktmax: Max packet size that can be used
195 * @list: Buffer or chain of buffers to be returned to caller 194 * @list: Buffer or chain of buffers to be returned to caller
@@ -221,8 +220,7 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
221 __skb_queue_tail(list, skb); 220 __skb_queue_tail(list, skb);
222 skb_copy_to_linear_data(skb, mhdr, mhsz); 221 skb_copy_to_linear_data(skb, mhdr, mhsz);
223 pktpos = skb->data + mhsz; 222 pktpos = skb->data + mhsz;
224 if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, 223 if (copy_from_iter(pktpos, dsz, &m->msg_iter) == dsz)
225 dsz))
226 return dsz; 224 return dsz;
227 rc = -EFAULT; 225 rc = -EFAULT;
228 goto error; 226 goto error;
@@ -252,12 +250,11 @@ int tipc_msg_build(struct net *net, struct tipc_msg *mhdr, struct msghdr *m,
252 if (drem < pktrem) 250 if (drem < pktrem)
253 pktrem = drem; 251 pktrem = drem;
254 252
255 if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) { 253 if (copy_from_iter(pktpos, pktrem, &m->msg_iter) != pktrem) {
256 rc = -EFAULT; 254 rc = -EFAULT;
257 goto error; 255 goto error;
258 } 256 }
259 drem -= pktrem; 257 drem -= pktrem;
260 offset += pktrem;
261 258
262 if (!drem) 259 if (!drem)
263 break; 260 break;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 679a22082fcb..caa4d663fd90 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -733,6 +733,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
733 struct net *net = sock_net(sk); 733 struct net *net = sock_net(sk);
734 struct tipc_msg *mhdr = &tipc_sk(sk)->phdr; 734 struct tipc_msg *mhdr = &tipc_sk(sk)->phdr;
735 struct sk_buff_head head; 735 struct sk_buff_head head;
736 struct iov_iter save = msg->msg_iter;
736 uint mtu; 737 uint mtu;
737 int rc; 738 int rc;
738 739
@@ -758,8 +759,10 @@ new_mtu:
758 rc = dsz; 759 rc = dsz;
759 break; 760 break;
760 } 761 }
761 if (rc == -EMSGSIZE) 762 if (rc == -EMSGSIZE) {
763 msg->msg_iter = save;
762 goto new_mtu; 764 goto new_mtu;
765 }
763 if (rc != -ELINKCONG) 766 if (rc != -ELINKCONG)
764 break; 767 break;
765 tipc_sk(sk)->link_cong = 1; 768 tipc_sk(sk)->link_cong = 1;
@@ -895,6 +898,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
895 struct sk_buff_head head; 898 struct sk_buff_head head;
896 struct sk_buff *skb; 899 struct sk_buff *skb;
897 struct tipc_name_seq *seq = &dest->addr.nameseq; 900 struct tipc_name_seq *seq = &dest->addr.nameseq;
901 struct iov_iter save;
898 u32 mtu; 902 u32 mtu;
899 long timeo; 903 long timeo;
900 int rc; 904 int rc;
@@ -963,6 +967,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
963 msg_set_hdr_sz(mhdr, BASIC_H_SIZE); 967 msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
964 } 968 }
965 969
970 save = m->msg_iter;
966new_mtu: 971new_mtu:
967 mtu = tipc_node_get_mtu(net, dnode, tsk->portid); 972 mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
968 __skb_queue_head_init(&head); 973 __skb_queue_head_init(&head);
@@ -980,8 +985,10 @@ new_mtu:
980 rc = dsz; 985 rc = dsz;
981 break; 986 break;
982 } 987 }
983 if (rc == -EMSGSIZE) 988 if (rc == -EMSGSIZE) {
989 m->msg_iter = save;
984 goto new_mtu; 990 goto new_mtu;
991 }
985 if (rc != -ELINKCONG) 992 if (rc != -ELINKCONG)
986 break; 993 break;
987 tsk->link_cong = 1; 994 tsk->link_cong = 1;
@@ -1052,6 +1059,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
1052 long timeo; 1059 long timeo;
1053 u32 dnode; 1060 u32 dnode;
1054 uint mtu, send, sent = 0; 1061 uint mtu, send, sent = 0;
1062 struct iov_iter save;
1055 1063
1056 /* Handle implied connection establishment */ 1064 /* Handle implied connection establishment */
1057 if (unlikely(dest)) { 1065 if (unlikely(dest)) {
@@ -1078,6 +1086,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
1078 dnode = tsk_peer_node(tsk); 1086 dnode = tsk_peer_node(tsk);
1079 1087
1080next: 1088next:
1089 save = m->msg_iter;
1081 mtu = tsk->max_pkt; 1090 mtu = tsk->max_pkt;
1082 send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); 1091 send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
1083 __skb_queue_head_init(&head); 1092 __skb_queue_head_init(&head);
@@ -1097,6 +1106,7 @@ next:
1097 if (rc == -EMSGSIZE) { 1106 if (rc == -EMSGSIZE) {
1098 tsk->max_pkt = tipc_node_get_mtu(net, dnode, 1107 tsk->max_pkt = tipc_node_get_mtu(net, dnode,
1099 portid); 1108 portid);
1109 m->msg_iter = save;
1100 goto next; 1110 goto next;
1101 } 1111 }
1102 if (rc != -ELINKCONG) 1112 if (rc != -ELINKCONG)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 02d2e5229240..7f3255084a6c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1850,8 +1850,7 @@ static ssize_t vmci_transport_stream_enqueue(
1850 struct msghdr *msg, 1850 struct msghdr *msg,
1851 size_t len) 1851 size_t len)
1852{ 1852{
1853 /* XXX: stripping const */ 1853 return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
1854 return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0);
1855} 1854}
1856 1855
1857static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) 1856static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)