aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristophe Leroy <christophe.leroy@c-s.fr>2015-05-06 11:26:47 -0400
committerJens Axboe <axboe@fb.com>2015-05-06 11:27:41 -0400
commit0ff28d9f4674d781e492bcff6f32f0fe48cf0fed (patch)
treed48fa5e6e95ffc8dc726eb9d72dcaef886463ae3 /fs
parent9ba52e5812e53f20f23600d79449a3ec05a0254f (diff)
splice: sendfile() at once fails for big files
Using sendfile with below small program to get MD5 sums of some files, it appear that big files (over 64kbytes with 4k pages system) get a wrong MD5 sum while small files get the correct sum. This program uses sendfile() to send a file to an AF_ALG socket for hashing. /* md5sum2.c */ #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <fcntl.h> #include <sys/socket.h> #include <sys/stat.h> #include <sys/types.h> #include <linux/if_alg.h> int main(int argc, char **argv) { int sk = socket(AF_ALG, SOCK_SEQPACKET, 0); struct stat st; struct sockaddr_alg sa = { .salg_family = AF_ALG, .salg_type = "hash", .salg_name = "md5", }; int n; bind(sk, (struct sockaddr*)&sa, sizeof(sa)); for (n = 1; n < argc; n++) { int size; int offset = 0; char buf[4096]; int fd; int sko; int i; fd = open(argv[n], O_RDONLY); sko = accept(sk, NULL, 0); fstat(fd, &st); size = st.st_size; sendfile(sko, fd, &offset, size); size = read(sko, buf, sizeof(buf)); for (i = 0; i < size; i++) printf("%2.2x", buf[i]); printf(" %s\n", argv[n]); close(fd); close(sko); } exit(0); } Test below is done using official linux patch files. First result is with a software based md5sum. Second result is with the program above. root@vgoip:~# ls -l patch-3.6.* -rw-r--r-- 1 root root 64011 Aug 24 12:01 patch-3.6.2.gz -rw-r--r-- 1 root root 94131 Aug 24 12:01 patch-3.6.3.gz root@vgoip:~# md5sum patch-3.6.* b3ffb9848196846f31b2ff133d2d6443 patch-3.6.2.gz c5e8f687878457db77cb7158c38a7e43 patch-3.6.3.gz root@vgoip:~# ./md5sum2 patch-3.6.* b3ffb9848196846f31b2ff133d2d6443 patch-3.6.2.gz 5fd77b24e68bb24dcc72d6e57c64790e patch-3.6.3.gz After investivation, it appears that sendfile() sends the files by blocks of 64kbytes (16 times PAGE_SIZE). The problem is that at the end of each block, the SPLICE_F_MORE flag is missing, therefore the hashing operation is reset as if it was the end of the file. This patch adds SPLICE_F_MORE to the flags when more data is pending. With the patch applied, we get the correct sums: root@vgoip:~# md5sum patch-3.6.* b3ffb9848196846f31b2ff133d2d6443 patch-3.6.2.gz c5e8f687878457db77cb7158c38a7e43 patch-3.6.3.gz root@vgoip:~# ./md5sum2 patch-3.6.* b3ffb9848196846f31b2ff133d2d6443 patch-3.6.2.gz c5e8f687878457db77cb7158c38a7e43 patch-3.6.3.gz Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/splice.c12
1 files changed, 11 insertions, 1 deletions
diff --git a/fs/splice.c b/fs/splice.c
index 476024bb6546..bfe62ae40f40 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1161 long ret, bytes; 1161 long ret, bytes;
1162 umode_t i_mode; 1162 umode_t i_mode;
1163 size_t len; 1163 size_t len;
1164 int i, flags; 1164 int i, flags, more;
1165 1165
1166 /* 1166 /*
1167 * We require the input being a regular file, as we don't want to 1167 * We require the input being a regular file, as we don't want to
@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1204 * Don't block on output, we have to drain the direct pipe. 1204 * Don't block on output, we have to drain the direct pipe.
1205 */ 1205 */
1206 sd->flags &= ~SPLICE_F_NONBLOCK; 1206 sd->flags &= ~SPLICE_F_NONBLOCK;
1207 more = sd->flags & SPLICE_F_MORE;
1207 1208
1208 while (len) { 1209 while (len) {
1209 size_t read_len; 1210 size_t read_len;
@@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1217 sd->total_len = read_len; 1218 sd->total_len = read_len;
1218 1219
1219 /* 1220 /*
1221 * If more data is pending, set SPLICE_F_MORE
1222 * If this is the last data and SPLICE_F_MORE was not set
1223 * initially, clears it.
1224 */
1225 if (read_len < len)
1226 sd->flags |= SPLICE_F_MORE;
1227 else if (!more)
1228 sd->flags &= ~SPLICE_F_MORE;
1229 /*
1220 * NOTE: nonblocking mode only applies to the input. We 1230 * NOTE: nonblocking mode only applies to the input. We
1221 * must not do the output in nonblocking mode as then we 1231 * must not do the output in nonblocking mode as then we
1222 * could get stuck data in the internal pipe: 1232 * could get stuck data in the internal pipe: