diff options
author | Jens Axboe <axboe@suse.de> | 2006-05-01 14:02:05 -0400 |
---|---|---|
committer | Jens Axboe <axboe@suse.de> | 2006-05-01 14:02:05 -0400 |
commit | f6762b7ad8edd6abc802542ce845d3bc8adcb92f (patch) | |
tree | c941aaf7295f8efecaf42aa86112649b9b6f23a9 /fs/pipe.c | |
parent | e27dedd84c119e2f7af54fcde3293be5ad812103 (diff) |
[PATCH] pipe: enable atomic copying of pipe data to/from user space
The pipe ->map() method uses kmap() to virtually map the pages, which
is both slow and has known scalability issues on SMP. This patch enables
atomic copying of pipe pages, by pre-faulting data and using kmap_atomic()
instead.
lmbench bw_pipe and lat_pipe measurements agree this is a Good Thing. Here
are results from that on a UP machine with highmem (1.5GiB of RAM), running
first a UP kernel, SMP kernel, and SMP kernel patched.
Vanilla-UP:
Pipe bandwidth: 1622.28 MB/sec
Pipe bandwidth: 1610.59 MB/sec
Pipe bandwidth: 1608.30 MB/sec
Pipe latency: 7.3275 microseconds
Pipe latency: 7.2995 microseconds
Pipe latency: 7.3097 microseconds
Vanilla-SMP:
Pipe bandwidth: 1382.19 MB/sec
Pipe bandwidth: 1317.27 MB/sec
Pipe bandwidth: 1355.61 MB/sec
Pipe latency: 9.6402 microseconds
Pipe latency: 9.6696 microseconds
Pipe latency: 9.6153 microseconds
Patched-SMP:
Pipe bandwidth: 1578.70 MB/sec
Pipe bandwidth: 1579.95 MB/sec
Pipe bandwidth: 1578.63 MB/sec
Pipe latency: 9.1654 microseconds
Pipe latency: 9.2266 microseconds
Pipe latency: 9.1527 microseconds
Signed-off-by: Jens Axboe <axboe@suse.de>
Diffstat (limited to 'fs/pipe.c')
-rw-r--r-- | fs/pipe.c | 141 |
1 files changed, 118 insertions, 23 deletions
@@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe) | |||
55 | } | 55 | } |
56 | 56 | ||
57 | static int | 57 | static int |
58 | pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) | 58 | pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, |
59 | int atomic) | ||
59 | { | 60 | { |
60 | unsigned long copy; | 61 | unsigned long copy; |
61 | 62 | ||
@@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) | |||
64 | iov++; | 65 | iov++; |
65 | copy = min_t(unsigned long, len, iov->iov_len); | 66 | copy = min_t(unsigned long, len, iov->iov_len); |
66 | 67 | ||
67 | if (copy_from_user(to, iov->iov_base, copy)) | 68 | if (atomic) { |
68 | return -EFAULT; | 69 | if (__copy_from_user_inatomic(to, iov->iov_base, copy)) |
70 | return -EFAULT; | ||
71 | } else { | ||
72 | if (copy_from_user(to, iov->iov_base, copy)) | ||
73 | return -EFAULT; | ||
74 | } | ||
69 | to += copy; | 75 | to += copy; |
70 | len -= copy; | 76 | len -= copy; |
71 | iov->iov_base += copy; | 77 | iov->iov_base += copy; |
@@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) | |||
75 | } | 81 | } |
76 | 82 | ||
77 | static int | 83 | static int |
78 | pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) | 84 | pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, |
85 | int atomic) | ||
79 | { | 86 | { |
80 | unsigned long copy; | 87 | unsigned long copy; |
81 | 88 | ||
@@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) | |||
84 | iov++; | 91 | iov++; |
85 | copy = min_t(unsigned long, len, iov->iov_len); | 92 | copy = min_t(unsigned long, len, iov->iov_len); |
86 | 93 | ||
87 | if (copy_to_user(iov->iov_base, from, copy)) | 94 | if (atomic) { |
88 | return -EFAULT; | 95 | if (__copy_to_user_inatomic(iov->iov_base, from, copy)) |
96 | return -EFAULT; | ||
97 | } else { | ||
98 | if (copy_to_user(iov->iov_base, from, copy)) | ||
99 | return -EFAULT; | ||
100 | } | ||
89 | from += copy; | 101 | from += copy; |
90 | len -= copy; | 102 | len -= copy; |
91 | iov->iov_base += copy; | 103 | iov->iov_base += copy; |
@@ -94,6 +106,47 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) | |||
94 | return 0; | 106 | return 0; |
95 | } | 107 | } |
96 | 108 | ||
109 | /* | ||
110 | * Attempt to pre-fault in the user memory, so we can use atomic copies. | ||
111 | * Returns the number of bytes not faulted in. | ||
112 | */ | ||
113 | static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) | ||
114 | { | ||
115 | while (!iov->iov_len) | ||
116 | iov++; | ||
117 | |||
118 | while (len > 0) { | ||
119 | unsigned long this_len; | ||
120 | |||
121 | this_len = min_t(unsigned long, len, iov->iov_len); | ||
122 | if (fault_in_pages_writeable(iov->iov_base, this_len)) | ||
123 | break; | ||
124 | |||
125 | len -= this_len; | ||
126 | iov++; | ||
127 | } | ||
128 | |||
129 | return len; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Pre-fault in the user memory, so we can use atomic copies. | ||
134 | */ | ||
135 | static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) | ||
136 | { | ||
137 | while (!iov->iov_len) | ||
138 | iov++; | ||
139 | |||
140 | while (len > 0) { | ||
141 | unsigned long this_len; | ||
142 | |||
143 | this_len = min_t(unsigned long, len, iov->iov_len); | ||
144 | fault_in_pages_readable(iov->iov_base, this_len); | ||
145 | len -= this_len; | ||
146 | iov++; | ||
147 | } | ||
148 | } | ||
149 | |||
97 | static void anon_pipe_buf_release(struct pipe_inode_info *pipe, | 150 | static void anon_pipe_buf_release(struct pipe_inode_info *pipe, |
98 | struct pipe_buffer *buf) | 151 | struct pipe_buffer *buf) |
99 | { | 152 | { |
@@ -111,15 +164,24 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, | |||
111 | } | 164 | } |
112 | 165 | ||
113 | void *generic_pipe_buf_map(struct pipe_inode_info *pipe, | 166 | void *generic_pipe_buf_map(struct pipe_inode_info *pipe, |
114 | struct pipe_buffer *buf) | 167 | struct pipe_buffer *buf, int atomic) |
115 | { | 168 | { |
169 | if (atomic) { | ||
170 | buf->flags |= PIPE_BUF_FLAG_ATOMIC; | ||
171 | return kmap_atomic(buf->page, KM_USER0); | ||
172 | } | ||
173 | |||
116 | return kmap(buf->page); | 174 | return kmap(buf->page); |
117 | } | 175 | } |
118 | 176 | ||
119 | void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, | 177 | void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, |
120 | struct pipe_buffer *buf) | 178 | struct pipe_buffer *buf, void *map_data) |
121 | { | 179 | { |
122 | kunmap(buf->page); | 180 | if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { |
181 | buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; | ||
182 | kunmap_atomic(map_data, KM_USER0); | ||
183 | } else | ||
184 | kunmap(buf->page); | ||
123 | } | 185 | } |
124 | 186 | ||
125 | static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, | 187 | static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, |
@@ -183,7 +245,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, | |||
183 | struct pipe_buf_operations *ops = buf->ops; | 245 | struct pipe_buf_operations *ops = buf->ops; |
184 | void *addr; | 246 | void *addr; |
185 | size_t chars = buf->len; | 247 | size_t chars = buf->len; |
186 | int error; | 248 | int error, atomic; |
187 | 249 | ||
188 | if (chars > total_len) | 250 | if (chars > total_len) |
189 | chars = total_len; | 251 | chars = total_len; |
@@ -195,12 +257,21 @@ pipe_readv(struct file *filp, const struct iovec *_iov, | |||
195 | break; | 257 | break; |
196 | } | 258 | } |
197 | 259 | ||
198 | addr = ops->map(pipe, buf); | 260 | atomic = !iov_fault_in_pages_write(iov, chars); |
199 | error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); | 261 | redo: |
200 | ops->unmap(pipe, buf); | 262 | addr = ops->map(pipe, buf, atomic); |
263 | error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); | ||
264 | ops->unmap(pipe, buf, addr); | ||
201 | if (unlikely(error)) { | 265 | if (unlikely(error)) { |
266 | /* | ||
267 | * Just retry with the slow path if we failed. | ||
268 | */ | ||
269 | if (atomic) { | ||
270 | atomic = 0; | ||
271 | goto redo; | ||
272 | } | ||
202 | if (!ret) | 273 | if (!ret) |
203 | ret = -EFAULT; | 274 | ret = error; |
204 | break; | 275 | break; |
205 | } | 276 | } |
206 | ret += chars; | 277 | ret += chars; |
@@ -304,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov, | |||
304 | int offset = buf->offset + buf->len; | 375 | int offset = buf->offset + buf->len; |
305 | 376 | ||
306 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { | 377 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { |
378 | int error, atomic = 1; | ||
307 | void *addr; | 379 | void *addr; |
308 | int error; | ||
309 | 380 | ||
310 | error = ops->pin(pipe, buf); | 381 | error = ops->pin(pipe, buf); |
311 | if (error) | 382 | if (error) |
312 | goto out; | 383 | goto out; |
313 | 384 | ||
314 | addr = ops->map(pipe, buf); | 385 | iov_fault_in_pages_read(iov, chars); |
386 | redo1: | ||
387 | addr = ops->map(pipe, buf, atomic); | ||
315 | error = pipe_iov_copy_from_user(offset + addr, iov, | 388 | error = pipe_iov_copy_from_user(offset + addr, iov, |
316 | chars); | 389 | chars, atomic); |
317 | ops->unmap(pipe, buf); | 390 | ops->unmap(pipe, buf, addr); |
318 | ret = error; | 391 | ret = error; |
319 | do_wakeup = 1; | 392 | do_wakeup = 1; |
320 | if (error) | 393 | if (error) { |
394 | if (atomic) { | ||
395 | atomic = 0; | ||
396 | goto redo1; | ||
397 | } | ||
321 | goto out; | 398 | goto out; |
399 | } | ||
322 | buf->len += chars; | 400 | buf->len += chars; |
323 | total_len -= chars; | 401 | total_len -= chars; |
324 | ret = chars; | 402 | ret = chars; |
@@ -341,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov, | |||
341 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); | 419 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); |
342 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 420 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
343 | struct page *page = pipe->tmp_page; | 421 | struct page *page = pipe->tmp_page; |
344 | int error; | 422 | char *src; |
423 | int error, atomic = 1; | ||
345 | 424 | ||
346 | if (!page) { | 425 | if (!page) { |
347 | page = alloc_page(GFP_HIGHUSER); | 426 | page = alloc_page(GFP_HIGHUSER); |
@@ -361,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov, | |||
361 | if (chars > total_len) | 440 | if (chars > total_len) |
362 | chars = total_len; | 441 | chars = total_len; |
363 | 442 | ||
364 | error = pipe_iov_copy_from_user(kmap(page), iov, chars); | 443 | iov_fault_in_pages_read(iov, chars); |
365 | kunmap(page); | 444 | redo2: |
445 | if (atomic) | ||
446 | src = kmap_atomic(page, KM_USER0); | ||
447 | else | ||
448 | src = kmap(page); | ||
449 | |||
450 | error = pipe_iov_copy_from_user(src, iov, chars, | ||
451 | atomic); | ||
452 | if (atomic) | ||
453 | kunmap_atomic(src, KM_USER0); | ||
454 | else | ||
455 | kunmap(page); | ||
456 | |||
366 | if (unlikely(error)) { | 457 | if (unlikely(error)) { |
458 | if (atomic) { | ||
459 | atomic = 0; | ||
460 | goto redo2; | ||
461 | } | ||
367 | if (!ret) | 462 | if (!ret) |
368 | ret = -EFAULT; | 463 | ret = error; |
369 | break; | 464 | break; |
370 | } | 465 | } |
371 | ret += chars; | 466 | ret += chars; |