diff options
Diffstat (limited to 'fs/read_write.c')
-rw-r--r-- | fs/read_write.c | 253 |
1 files changed, 160 insertions, 93 deletions
diff --git a/fs/read_write.c b/fs/read_write.c index d4cb3183c99c..f792000a28e6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -15,13 +15,15 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
17 | #include <linux/pagemap.h> | 17 | #include <linux/pagemap.h> |
18 | #include "read_write.h" | ||
18 | 19 | ||
19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
20 | #include <asm/unistd.h> | 21 | #include <asm/unistd.h> |
21 | 22 | ||
22 | const struct file_operations generic_ro_fops = { | 23 | const struct file_operations generic_ro_fops = { |
23 | .llseek = generic_file_llseek, | 24 | .llseek = generic_file_llseek, |
24 | .read = generic_file_read, | 25 | .read = do_sync_read, |
26 | .aio_read = generic_file_aio_read, | ||
25 | .mmap = generic_file_readonly_mmap, | 27 | .mmap = generic_file_readonly_mmap, |
26 | .sendfile = generic_file_sendfile, | 28 | .sendfile = generic_file_sendfile, |
27 | }; | 29 | }; |
@@ -227,14 +229,20 @@ static void wait_on_retry_sync_kiocb(struct kiocb *iocb) | |||
227 | 229 | ||
228 | ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) | 230 | ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) |
229 | { | 231 | { |
232 | struct iovec iov = { .iov_base = buf, .iov_len = len }; | ||
230 | struct kiocb kiocb; | 233 | struct kiocb kiocb; |
231 | ssize_t ret; | 234 | ssize_t ret; |
232 | 235 | ||
233 | init_sync_kiocb(&kiocb, filp); | 236 | init_sync_kiocb(&kiocb, filp); |
234 | kiocb.ki_pos = *ppos; | 237 | kiocb.ki_pos = *ppos; |
235 | while (-EIOCBRETRY == | 238 | kiocb.ki_left = len; |
236 | (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) | 239 | |
240 | for (;;) { | ||
241 | ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); | ||
242 | if (ret != -EIOCBRETRY) | ||
243 | break; | ||
237 | wait_on_retry_sync_kiocb(&kiocb); | 244 | wait_on_retry_sync_kiocb(&kiocb); |
245 | } | ||
238 | 246 | ||
239 | if (-EIOCBQUEUED == ret) | 247 | if (-EIOCBQUEUED == ret) |
240 | ret = wait_on_sync_kiocb(&kiocb); | 248 | ret = wait_on_sync_kiocb(&kiocb); |
@@ -279,14 +287,20 @@ EXPORT_SYMBOL(vfs_read); | |||
279 | 287 | ||
280 | ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) | 288 | ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) |
281 | { | 289 | { |
290 | struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; | ||
282 | struct kiocb kiocb; | 291 | struct kiocb kiocb; |
283 | ssize_t ret; | 292 | ssize_t ret; |
284 | 293 | ||
285 | init_sync_kiocb(&kiocb, filp); | 294 | init_sync_kiocb(&kiocb, filp); |
286 | kiocb.ki_pos = *ppos; | 295 | kiocb.ki_pos = *ppos; |
287 | while (-EIOCBRETRY == | 296 | kiocb.ki_left = len; |
288 | (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) | 297 | |
298 | for (;;) { | ||
299 | ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); | ||
300 | if (ret != -EIOCBRETRY) | ||
301 | break; | ||
289 | wait_on_retry_sync_kiocb(&kiocb); | 302 | wait_on_retry_sync_kiocb(&kiocb); |
303 | } | ||
290 | 304 | ||
291 | if (-EIOCBQUEUED == ret) | 305 | if (-EIOCBQUEUED == ret) |
292 | ret = wait_on_sync_kiocb(&kiocb); | 306 | ret = wait_on_sync_kiocb(&kiocb); |
@@ -438,78 +452,155 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) | |||
438 | 452 | ||
439 | EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ | 453 | EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ |
440 | 454 | ||
455 | ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, | ||
456 | unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) | ||
457 | { | ||
458 | struct kiocb kiocb; | ||
459 | ssize_t ret; | ||
460 | |||
461 | init_sync_kiocb(&kiocb, filp); | ||
462 | kiocb.ki_pos = *ppos; | ||
463 | kiocb.ki_left = len; | ||
464 | kiocb.ki_nbytes = len; | ||
465 | |||
466 | for (;;) { | ||
467 | ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); | ||
468 | if (ret != -EIOCBRETRY) | ||
469 | break; | ||
470 | wait_on_retry_sync_kiocb(&kiocb); | ||
471 | } | ||
472 | |||
473 | if (ret == -EIOCBQUEUED) | ||
474 | ret = wait_on_sync_kiocb(&kiocb); | ||
475 | *ppos = kiocb.ki_pos; | ||
476 | return ret; | ||
477 | } | ||
478 | |||
479 | /* Do it by hand, with file-ops */ | ||
480 | ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, | ||
481 | unsigned long nr_segs, loff_t *ppos, io_fn_t fn) | ||
482 | { | ||
483 | struct iovec *vector = iov; | ||
484 | ssize_t ret = 0; | ||
485 | |||
486 | while (nr_segs > 0) { | ||
487 | void __user *base; | ||
488 | size_t len; | ||
489 | ssize_t nr; | ||
490 | |||
491 | base = vector->iov_base; | ||
492 | len = vector->iov_len; | ||
493 | vector++; | ||
494 | nr_segs--; | ||
495 | |||
496 | nr = fn(filp, base, len, ppos); | ||
497 | |||
498 | if (nr < 0) { | ||
499 | if (!ret) | ||
500 | ret = nr; | ||
501 | break; | ||
502 | } | ||
503 | ret += nr; | ||
504 | if (nr != len) | ||
505 | break; | ||
506 | } | ||
507 | |||
508 | return ret; | ||
509 | } | ||
510 | |||
441 | /* A write operation does a read from user space and vice versa */ | 511 | /* A write operation does a read from user space and vice versa */ |
442 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) | 512 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) |
443 | 513 | ||
514 | ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, | ||
515 | unsigned long nr_segs, unsigned long fast_segs, | ||
516 | struct iovec *fast_pointer, | ||
517 | struct iovec **ret_pointer) | ||
518 | { | ||
519 | unsigned long seg; | ||
520 | ssize_t ret; | ||
521 | struct iovec *iov = fast_pointer; | ||
522 | |||
523 | /* | ||
524 | * SuS says "The readv() function *may* fail if the iovcnt argument | ||
525 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has | ||
526 | * traditionally returned zero for zero segments, so... | ||
527 | */ | ||
528 | if (nr_segs == 0) { | ||
529 | ret = 0; | ||
530 | goto out; | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * First get the "struct iovec" from user memory and | ||
535 | * verify all the pointers | ||
536 | */ | ||
537 | if (nr_segs > UIO_MAXIOV) { | ||
538 | ret = -EINVAL; | ||
539 | goto out; | ||
540 | } | ||
541 | if (nr_segs > fast_segs) { | ||
542 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | ||
543 | if (iov == NULL) { | ||
544 | ret = -ENOMEM; | ||
545 | goto out; | ||
546 | } | ||
547 | } | ||
548 | if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { | ||
549 | ret = -EFAULT; | ||
550 | goto out; | ||
551 | } | ||
552 | |||
553 | /* | ||
554 | * According to the Single Unix Specification we should return EINVAL | ||
555 | * if an element length is < 0 when cast to ssize_t or if the | ||
556 | * total length would overflow the ssize_t return value of the | ||
557 | * system call. | ||
558 | */ | ||
559 | ret = 0; | ||
560 | for (seg = 0; seg < nr_segs; seg++) { | ||
561 | void __user *buf = iov[seg].iov_base; | ||
562 | ssize_t len = (ssize_t)iov[seg].iov_len; | ||
563 | |||
564 | /* see if we we're about to use an invalid len or if | ||
565 | * it's about to overflow ssize_t */ | ||
566 | if (len < 0 || (ret + len < ret)) { | ||
567 | ret = -EINVAL; | ||
568 | goto out; | ||
569 | } | ||
570 | if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { | ||
571 | ret = -EFAULT; | ||
572 | goto out; | ||
573 | } | ||
574 | |||
575 | ret += len; | ||
576 | } | ||
577 | out: | ||
578 | *ret_pointer = iov; | ||
579 | return ret; | ||
580 | } | ||
581 | |||
444 | static ssize_t do_readv_writev(int type, struct file *file, | 582 | static ssize_t do_readv_writev(int type, struct file *file, |
445 | const struct iovec __user * uvector, | 583 | const struct iovec __user * uvector, |
446 | unsigned long nr_segs, loff_t *pos) | 584 | unsigned long nr_segs, loff_t *pos) |
447 | { | 585 | { |
448 | typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); | ||
449 | typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); | ||
450 | |||
451 | size_t tot_len; | 586 | size_t tot_len; |
452 | struct iovec iovstack[UIO_FASTIOV]; | 587 | struct iovec iovstack[UIO_FASTIOV]; |
453 | struct iovec *iov=iovstack, *vector; | 588 | struct iovec *iov = iovstack; |
454 | ssize_t ret; | 589 | ssize_t ret; |
455 | int seg; | ||
456 | io_fn_t fn; | 590 | io_fn_t fn; |
457 | iov_fn_t fnv; | 591 | iov_fn_t fnv; |
458 | 592 | ||
459 | /* | 593 | if (!file->f_op) { |
460 | * SuS says "The readv() function *may* fail if the iovcnt argument | 594 | ret = -EINVAL; |
461 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has | ||
462 | * traditionally returned zero for zero segments, so... | ||
463 | */ | ||
464 | ret = 0; | ||
465 | if (nr_segs == 0) | ||
466 | goto out; | 595 | goto out; |
596 | } | ||
467 | 597 | ||
468 | /* | 598 | ret = rw_copy_check_uvector(type, uvector, nr_segs, |
469 | * First get the "struct iovec" from user memory and | 599 | ARRAY_SIZE(iovstack), iovstack, &iov); |
470 | * verify all the pointers | 600 | if (ret <= 0) |
471 | */ | ||
472 | ret = -EINVAL; | ||
473 | if (nr_segs > UIO_MAXIOV) | ||
474 | goto out; | ||
475 | if (!file->f_op) | ||
476 | goto out; | ||
477 | if (nr_segs > UIO_FASTIOV) { | ||
478 | ret = -ENOMEM; | ||
479 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | ||
480 | if (!iov) | ||
481 | goto out; | ||
482 | } | ||
483 | ret = -EFAULT; | ||
484 | if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) | ||
485 | goto out; | 601 | goto out; |
486 | 602 | ||
487 | /* | 603 | tot_len = ret; |
488 | * Single unix specification: | ||
489 | * We should -EINVAL if an element length is not >= 0 and fitting an | ||
490 | * ssize_t. The total length is fitting an ssize_t | ||
491 | * | ||
492 | * Be careful here because iov_len is a size_t not an ssize_t | ||
493 | */ | ||
494 | tot_len = 0; | ||
495 | ret = -EINVAL; | ||
496 | for (seg = 0; seg < nr_segs; seg++) { | ||
497 | void __user *buf = iov[seg].iov_base; | ||
498 | ssize_t len = (ssize_t)iov[seg].iov_len; | ||
499 | |||
500 | if (len < 0) /* size_t not fitting an ssize_t .. */ | ||
501 | goto out; | ||
502 | if (unlikely(!access_ok(vrfy_dir(type), buf, len))) | ||
503 | goto Efault; | ||
504 | tot_len += len; | ||
505 | if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ | ||
506 | goto out; | ||
507 | } | ||
508 | if (tot_len == 0) { | ||
509 | ret = 0; | ||
510 | goto out; | ||
511 | } | ||
512 | |||
513 | ret = rw_verify_area(type, file, pos, tot_len); | 604 | ret = rw_verify_area(type, file, pos, tot_len); |
514 | if (ret < 0) | 605 | if (ret < 0) |
515 | goto out; | 606 | goto out; |
@@ -520,39 +611,18 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
520 | fnv = NULL; | 611 | fnv = NULL; |
521 | if (type == READ) { | 612 | if (type == READ) { |
522 | fn = file->f_op->read; | 613 | fn = file->f_op->read; |
523 | fnv = file->f_op->readv; | 614 | fnv = file->f_op->aio_read; |
524 | } else { | 615 | } else { |
525 | fn = (io_fn_t)file->f_op->write; | 616 | fn = (io_fn_t)file->f_op->write; |
526 | fnv = file->f_op->writev; | 617 | fnv = file->f_op->aio_write; |
527 | } | 618 | } |
528 | if (fnv) { | ||
529 | ret = fnv(file, iov, nr_segs, pos); | ||
530 | goto out; | ||
531 | } | ||
532 | |||
533 | /* Do it by hand, with file-ops */ | ||
534 | ret = 0; | ||
535 | vector = iov; | ||
536 | while (nr_segs > 0) { | ||
537 | void __user * base; | ||
538 | size_t len; | ||
539 | ssize_t nr; | ||
540 | 619 | ||
541 | base = vector->iov_base; | 620 | if (fnv) |
542 | len = vector->iov_len; | 621 | ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, |
543 | vector++; | 622 | pos, fnv); |
544 | nr_segs--; | 623 | else |
545 | 624 | ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); | |
546 | nr = fn(file, base, len, pos); | ||
547 | 625 | ||
548 | if (nr < 0) { | ||
549 | if (!ret) ret = nr; | ||
550 | break; | ||
551 | } | ||
552 | ret += nr; | ||
553 | if (nr != len) | ||
554 | break; | ||
555 | } | ||
556 | out: | 626 | out: |
557 | if (iov != iovstack) | 627 | if (iov != iovstack) |
558 | kfree(iov); | 628 | kfree(iov); |
@@ -563,9 +633,6 @@ out: | |||
563 | fsnotify_modify(file->f_dentry); | 633 | fsnotify_modify(file->f_dentry); |
564 | } | 634 | } |
565 | return ret; | 635 | return ret; |
566 | Efault: | ||
567 | ret = -EFAULT; | ||
568 | goto out; | ||
569 | } | 636 | } |
570 | 637 | ||
571 | ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, | 638 | ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, |
@@ -573,7 +640,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, | |||
573 | { | 640 | { |
574 | if (!(file->f_mode & FMODE_READ)) | 641 | if (!(file->f_mode & FMODE_READ)) |
575 | return -EBADF; | 642 | return -EBADF; |
576 | if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) | 643 | if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) |
577 | return -EINVAL; | 644 | return -EINVAL; |
578 | 645 | ||
579 | return do_readv_writev(READ, file, vec, vlen, pos); | 646 | return do_readv_writev(READ, file, vec, vlen, pos); |
@@ -586,7 +653,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, | |||
586 | { | 653 | { |
587 | if (!(file->f_mode & FMODE_WRITE)) | 654 | if (!(file->f_mode & FMODE_WRITE)) |
588 | return -EBADF; | 655 | return -EBADF; |
589 | if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) | 656 | if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) |
590 | return -EINVAL; | 657 | return -EINVAL; |
591 | 658 | ||
592 | return do_readv_writev(WRITE, file, vec, vlen, pos); | 659 | return do_readv_writev(WRITE, file, vec, vlen, pos); |