diff options
author | Miklos Szeredi <mszeredi@suse.cz> | 2009-05-19 05:37:46 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-05-19 05:37:46 -0400 |
commit | b2858d7d1639c04ca3c54988d76c5f7300b76f1c (patch) | |
tree | 87bd9ecead14418c44320e2944a0178af64af9ea | |
parent | 4fc981ef9e7c0953d5c4896ce088b19c50cb018f (diff) |
splice: fix kmaps in default_file_splice_write()
Unfortunately multiple kmap() within a single thread are deadlockable,
so writing out multiple buffers with writev() isn't possible.
Change the implementation so that it does a separate write() for each
buffer. This actually simplifies the code a lot since the
splice_from_pipe() helper can be used.
This limitation is caused by HIGHMEM pages, and so only affects a
subset of architectures and configurations. In the future it may be
worth to implement default_file_splice_write() in a more efficient way
on configs that allow it.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/splice.c | 130 |
1 files changed, 22 insertions, 108 deletions
diff --git a/fs/splice.c b/fs/splice.c index 41179c0a655b..73766d24f97b 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -535,8 +535,8 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec, | |||
535 | return res; | 535 | return res; |
536 | } | 536 | } |
537 | 537 | ||
538 | static ssize_t kernel_writev(struct file *file, const struct iovec *vec, | 538 | static ssize_t kernel_write(struct file *file, const char *buf, size_t count, |
539 | unsigned long vlen, loff_t *ppos) | 539 | loff_t pos) |
540 | { | 540 | { |
541 | mm_segment_t old_fs; | 541 | mm_segment_t old_fs; |
542 | ssize_t res; | 542 | ssize_t res; |
@@ -544,7 +544,7 @@ static ssize_t kernel_writev(struct file *file, const struct iovec *vec, | |||
544 | old_fs = get_fs(); | 544 | old_fs = get_fs(); |
545 | set_fs(get_ds()); | 545 | set_fs(get_ds()); |
546 | /* The cast to a user pointer is valid due to the set_fs() */ | 546 | /* The cast to a user pointer is valid due to the set_fs() */ |
547 | res = vfs_writev(file, (const struct iovec __user *)vec, vlen, ppos); | 547 | res = vfs_write(file, (const char __user *)buf, count, &pos); |
548 | set_fs(old_fs); | 548 | set_fs(old_fs); |
549 | 549 | ||
550 | return res; | 550 | return res; |
@@ -1003,120 +1003,34 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1003 | 1003 | ||
1004 | EXPORT_SYMBOL(generic_file_splice_write); | 1004 | EXPORT_SYMBOL(generic_file_splice_write); |
1005 | 1005 | ||
1006 | static struct pipe_buffer *nth_pipe_buf(struct pipe_inode_info *pipe, int n) | 1006 | static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
1007 | struct splice_desc *sd) | ||
1007 | { | 1008 | { |
1008 | return &pipe->bufs[(pipe->curbuf + n) % PIPE_BUFFERS]; | 1009 | int ret; |
1010 | void *data; | ||
1011 | |||
1012 | ret = buf->ops->confirm(pipe, buf); | ||
1013 | if (ret) | ||
1014 | return ret; | ||
1015 | |||
1016 | data = buf->ops->map(pipe, buf, 0); | ||
1017 | ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); | ||
1018 | buf->ops->unmap(pipe, buf, data); | ||
1019 | |||
1020 | return ret; | ||
1009 | } | 1021 | } |
1010 | 1022 | ||
1011 | static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, | 1023 | static ssize_t default_file_splice_write(struct pipe_inode_info *pipe, |
1012 | struct file *out, loff_t *ppos, | 1024 | struct file *out, loff_t *ppos, |
1013 | size_t len, unsigned int flags) | 1025 | size_t len, unsigned int flags) |
1014 | { | 1026 | { |
1015 | ssize_t ret = 0; | 1027 | ssize_t ret; |
1016 | ssize_t total_len = 0; | ||
1017 | int do_wakeup = 0; | ||
1018 | |||
1019 | pipe_lock(pipe); | ||
1020 | while (len) { | ||
1021 | struct pipe_buffer *buf; | ||
1022 | void *data[PIPE_BUFFERS]; | ||
1023 | struct iovec vec[PIPE_BUFFERS]; | ||
1024 | unsigned int nr_pages = 0; | ||
1025 | unsigned int write_len = 0; | ||
1026 | unsigned int now_len = len; | ||
1027 | unsigned int this_len; | ||
1028 | int i; | ||
1029 | |||
1030 | BUG_ON(pipe->nrbufs > PIPE_BUFFERS); | ||
1031 | for (i = 0; i < pipe->nrbufs && now_len; i++) { | ||
1032 | buf = nth_pipe_buf(pipe, i); | ||
1033 | |||
1034 | ret = buf->ops->confirm(pipe, buf); | ||
1035 | if (ret) | ||
1036 | break; | ||
1037 | |||
1038 | data[i] = buf->ops->map(pipe, buf, 0); | ||
1039 | this_len = min(buf->len, now_len); | ||
1040 | vec[i].iov_base = (void __user *) data[i] + buf->offset; | ||
1041 | vec[i].iov_len = this_len; | ||
1042 | now_len -= this_len; | ||
1043 | write_len += this_len; | ||
1044 | nr_pages++; | ||
1045 | } | ||
1046 | |||
1047 | if (nr_pages) { | ||
1048 | ret = kernel_writev(out, vec, nr_pages, ppos); | ||
1049 | if (ret == 0) | ||
1050 | ret = -EIO; | ||
1051 | if (ret > 0) { | ||
1052 | len -= ret; | ||
1053 | total_len += ret; | ||
1054 | } | ||
1055 | } | ||
1056 | |||
1057 | for (i = 0; i < nr_pages; i++) { | ||
1058 | buf = nth_pipe_buf(pipe, i); | ||
1059 | buf->ops->unmap(pipe, buf, data[i]); | ||
1060 | |||
1061 | if (ret > 0) { | ||
1062 | this_len = min_t(unsigned, vec[i].iov_len, ret); | ||
1063 | buf->offset += this_len; | ||
1064 | buf->len -= this_len; | ||
1065 | ret -= this_len; | ||
1066 | } | ||
1067 | } | ||
1068 | |||
1069 | if (ret < 0) | ||
1070 | break; | ||
1071 | |||
1072 | while (pipe->nrbufs) { | ||
1073 | const struct pipe_buf_operations *ops; | ||
1074 | |||
1075 | buf = nth_pipe_buf(pipe, 0); | ||
1076 | if (buf->len) | ||
1077 | break; | ||
1078 | |||
1079 | ops = buf->ops; | ||
1080 | buf->ops = NULL; | ||
1081 | ops->release(pipe, buf); | ||
1082 | pipe->curbuf = (pipe->curbuf + 1) % PIPE_BUFFERS; | ||
1083 | pipe->nrbufs--; | ||
1084 | if (pipe->inode) | ||
1085 | do_wakeup = 1; | ||
1086 | } | ||
1087 | |||
1088 | if (pipe->nrbufs) | ||
1089 | continue; | ||
1090 | if (!pipe->writers) | ||
1091 | break; | ||
1092 | if (!pipe->waiting_writers) { | ||
1093 | if (total_len) | ||
1094 | break; | ||
1095 | } | ||
1096 | |||
1097 | if (flags & SPLICE_F_NONBLOCK) { | ||
1098 | ret = -EAGAIN; | ||
1099 | break; | ||
1100 | } | ||
1101 | |||
1102 | if (signal_pending(current)) { | ||
1103 | ret = -ERESTARTSYS; | ||
1104 | break; | ||
1105 | } | ||
1106 | |||
1107 | if (do_wakeup) { | ||
1108 | wakeup_pipe_writers(pipe); | ||
1109 | do_wakeup = 0; | ||
1110 | } | ||
1111 | |||
1112 | pipe_wait(pipe); | ||
1113 | } | ||
1114 | pipe_unlock(pipe); | ||
1115 | 1028 | ||
1116 | if (do_wakeup) | 1029 | ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf); |
1117 | wakeup_pipe_writers(pipe); | 1030 | if (ret > 0) |
1031 | *ppos += ret; | ||
1118 | 1032 | ||
1119 | return total_len ? total_len : ret; | 1033 | return ret; |
1120 | } | 1034 | } |
1121 | 1035 | ||
1122 | /** | 1036 | /** |