diff options
author | Zhu, Caifeng <zhucaifeng@unissoft-nj.com> | 2015-10-08 03:26:15 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-11-02 17:36:47 -0500 |
commit | b5b98989dc7ed2093aeb76f2d0db79888582b0a2 (patch) | |
tree | 5684938d5d0441c609a39d6211d82639d0b90918 /fs/ceph | |
parent | 777d738a5e58ba3b6f3932ab1543ce93703f4873 (diff) |
ceph: combine as many iovec as possile into one OSD request
Both ceph_sync_direct_write and ceph_sync_read iterate iovec elements
one by one, send one OSD request for each iovec. This is sub-optimal,
We can combine serveral iovec into one page vector, and send an OSD
request for the whole page vector.
Signed-off-by: Zhu, Caifeng <zhucaifeng@unissoft-nj.com>
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/file.c | 87 |
1 files changed, 77 insertions, 10 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0c62868b5c56..3c68e6aee2f0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -34,6 +34,74 @@ | |||
34 | * need to wait for MDS acknowledgement. | 34 | * need to wait for MDS acknowledgement. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | /* | ||
38 | * Calculate the length sum of direct io vectors that can | ||
39 | * be combined into one page vector. | ||
40 | */ | ||
41 | static size_t dio_get_pagev_size(const struct iov_iter *it) | ||
42 | { | ||
43 | const struct iovec *iov = it->iov; | ||
44 | const struct iovec *iovend = iov + it->nr_segs; | ||
45 | size_t size; | ||
46 | |||
47 | size = iov->iov_len - it->iov_offset; | ||
48 | /* | ||
49 | * An iov can be page vectored when both the current tail | ||
50 | * and the next base are page aligned. | ||
51 | */ | ||
52 | while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && | ||
53 | (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { | ||
54 | size += iov->iov_len; | ||
55 | } | ||
56 | dout("dio_get_pagevlen len = %zu\n", size); | ||
57 | return size; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Allocate a page vector based on (@it, @nbytes). | ||
62 | * The return value is the tuple describing a page vector, | ||
63 | * that is (@pages, @page_align, @num_pages). | ||
64 | */ | ||
65 | static struct page ** | ||
66 | dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, | ||
67 | size_t *page_align, int *num_pages) | ||
68 | { | ||
69 | struct iov_iter tmp_it = *it; | ||
70 | size_t align; | ||
71 | struct page **pages; | ||
72 | int ret = 0, idx, npages; | ||
73 | |||
74 | align = (unsigned long)(it->iov->iov_base + it->iov_offset) & | ||
75 | (PAGE_SIZE - 1); | ||
76 | npages = calc_pages_for(align, nbytes); | ||
77 | pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL); | ||
78 | if (!pages) { | ||
79 | pages = vmalloc(sizeof(*pages) * npages); | ||
80 | if (!pages) | ||
81 | return ERR_PTR(-ENOMEM); | ||
82 | } | ||
83 | |||
84 | for (idx = 0; idx < npages; ) { | ||
85 | size_t start; | ||
86 | ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes, | ||
87 | npages - idx, &start); | ||
88 | if (ret < 0) | ||
89 | goto fail; | ||
90 | |||
91 | iov_iter_advance(&tmp_it, ret); | ||
92 | nbytes -= ret; | ||
93 | idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE; | ||
94 | } | ||
95 | |||
96 | BUG_ON(nbytes != 0); | ||
97 | *num_pages = npages; | ||
98 | *page_align = align; | ||
99 | dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align); | ||
100 | return pages; | ||
101 | fail: | ||
102 | ceph_put_page_vector(pages, idx, false); | ||
103 | return ERR_PTR(ret); | ||
104 | } | ||
37 | 105 | ||
38 | /* | 106 | /* |
39 | * Prepare an open request. Preallocate ceph_cap to avoid an | 107 | * Prepare an open request. Preallocate ceph_cap to avoid an |
@@ -458,11 +526,10 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, | |||
458 | size_t start; | 526 | size_t start; |
459 | ssize_t n; | 527 | ssize_t n; |
460 | 528 | ||
461 | n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start); | 529 | n = dio_get_pagev_size(i); |
462 | if (n < 0) | 530 | pages = dio_get_pages_alloc(i, n, &start, &num_pages); |
463 | return n; | 531 | if (IS_ERR(pages)) |
464 | 532 | return PTR_ERR(pages); | |
465 | num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; | ||
466 | 533 | ||
467 | ret = striped_read(inode, off, n, | 534 | ret = striped_read(inode, off, n, |
468 | pages, num_pages, checkeof, | 535 | pages, num_pages, checkeof, |
@@ -592,7 +659,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, | |||
592 | CEPH_OSD_FLAG_WRITE; | 659 | CEPH_OSD_FLAG_WRITE; |
593 | 660 | ||
594 | while (iov_iter_count(from) > 0) { | 661 | while (iov_iter_count(from) > 0) { |
595 | u64 len = iov_iter_single_seg_count(from); | 662 | u64 len = dio_get_pagev_size(from); |
596 | size_t start; | 663 | size_t start; |
597 | ssize_t n; | 664 | ssize_t n; |
598 | 665 | ||
@@ -611,14 +678,14 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos, | |||
611 | 678 | ||
612 | osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); | 679 | osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0); |
613 | 680 | ||
614 | n = iov_iter_get_pages_alloc(from, &pages, len, &start); | 681 | n = len; |
615 | if (unlikely(n < 0)) { | 682 | pages = dio_get_pages_alloc(from, len, &start, &num_pages); |
616 | ret = n; | 683 | if (IS_ERR(pages)) { |
617 | ceph_osdc_put_request(req); | 684 | ceph_osdc_put_request(req); |
685 | ret = PTR_ERR(pages); | ||
618 | break; | 686 | break; |
619 | } | 687 | } |
620 | 688 | ||
621 | num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; | ||
622 | /* | 689 | /* |
623 | * throw out any page cache pages in this range. this | 690 | * throw out any page cache pages in this range. this |
624 | * may block. | 691 | * may block. |