aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2017-02-28 04:56:02 -0500
committerMichael S. Tsirkin <mst@redhat.com>2017-03-01 18:35:06 -0500
commitf889491380582b4ba2981cf0b0d7d6a40fb30ab7 (patch)
tree2c09c351ea07a45719a217a37fcc2fcec8591dff
parent0d9f0a52c8b9f7a003fe1650b7d5fb8518efabe0 (diff)
vhost: introduce O(1) vq metadata cache
When device IOTLB is enabled, all address translations were stored in interval tree. O(lgN) searching time could be slow for virtqueue metadata (avail, used and descriptors) since they were accessed much often than other addresses. So this patch introduces an O(1) array which points to the interval tree nodes that store the translations of vq metadata. Those array were update during vq IOTLB prefetching and were reset during each invalidation and tlb update. Each time we want to access vq metadata, this small array were queried before interval tree. This would be sufficient for static mappings but not dynamic mappings, we could do optimizations on top. Test were done with l2fwd in guest (2M hugepage): noiommu | before | after tx 1.32Mpps | 1.06Mpps(82%) | 1.30Mpps(98%) rx 2.33Mpps | 1.46Mpps(63%) | 2.29Mpps(98%) We can almost reach the same performance as noiommu mode. Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
-rw-r--r--drivers/vhost/vhost.c136
-rw-r--r--drivers/vhost/vhost.h8
2 files changed, 118 insertions, 26 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 1f7e4e4e6f8e..998bed505530 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -282,6 +282,22 @@ void vhost_poll_queue(struct vhost_poll *poll)
282} 282}
283EXPORT_SYMBOL_GPL(vhost_poll_queue); 283EXPORT_SYMBOL_GPL(vhost_poll_queue);
284 284
285static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
286{
287 int j;
288
289 for (j = 0; j < VHOST_NUM_ADDRS; j++)
290 vq->meta_iotlb[j] = NULL;
291}
292
293static void vhost_vq_meta_reset(struct vhost_dev *d)
294{
295 int i;
296
297 for (i = 0; i < d->nvqs; ++i)
298 __vhost_vq_meta_reset(d->vqs[i]);
299}
300
285static void vhost_vq_reset(struct vhost_dev *dev, 301static void vhost_vq_reset(struct vhost_dev *dev,
286 struct vhost_virtqueue *vq) 302 struct vhost_virtqueue *vq)
287{ 303{
@@ -312,6 +328,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
312 vq->busyloop_timeout = 0; 328 vq->busyloop_timeout = 0;
313 vq->umem = NULL; 329 vq->umem = NULL;
314 vq->iotlb = NULL; 330 vq->iotlb = NULL;
331 __vhost_vq_meta_reset(vq);
315} 332}
316 333
317static int vhost_worker(void *data) 334static int vhost_worker(void *data)
@@ -691,6 +708,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
691 return 1; 708 return 1;
692} 709}
693 710
711static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
712 u64 addr, unsigned int size,
713 int type)
714{
715 const struct vhost_umem_node *node = vq->meta_iotlb[type];
716
717 if (!node)
718 return NULL;
719
720 return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
721}
722
694/* Can we switch to this memory table? */ 723/* Can we switch to this memory table? */
695/* Caller should have device mutex but not vq mutex */ 724/* Caller should have device mutex but not vq mutex */
696static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem, 725static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -733,8 +762,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
733 * could be access through iotlb. So -EAGAIN should 762 * could be access through iotlb. So -EAGAIN should
734 * not happen in this case. 763 * not happen in this case.
735 */ 764 */
736 /* TODO: more fast path */
737 struct iov_iter t; 765 struct iov_iter t;
766 void __user *uaddr = vhost_vq_meta_fetch(vq,
767 (u64)(uintptr_t)to, size,
768 VHOST_ADDR_DESC);
769
770 if (uaddr)
771 return __copy_to_user(uaddr, from, size);
772
738 ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, 773 ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
739 ARRAY_SIZE(vq->iotlb_iov), 774 ARRAY_SIZE(vq->iotlb_iov),
740 VHOST_ACCESS_WO); 775 VHOST_ACCESS_WO);
@@ -762,8 +797,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
762 * could be access through iotlb. So -EAGAIN should 797 * could be access through iotlb. So -EAGAIN should
763 * not happen in this case. 798 * not happen in this case.
764 */ 799 */
765 /* TODO: more fast path */ 800 void __user *uaddr = vhost_vq_meta_fetch(vq,
801 (u64)(uintptr_t)from, size,
802 VHOST_ADDR_DESC);
766 struct iov_iter f; 803 struct iov_iter f;
804
805 if (uaddr)
806 return __copy_from_user(to, uaddr, size);
807
767 ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, 808 ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
768 ARRAY_SIZE(vq->iotlb_iov), 809 ARRAY_SIZE(vq->iotlb_iov),
769 VHOST_ACCESS_RO); 810 VHOST_ACCESS_RO);
@@ -783,17 +824,12 @@ out:
783 return ret; 824 return ret;
784} 825}
785 826
786static void __user *__vhost_get_user(struct vhost_virtqueue *vq, 827static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
787 void __user *addr, unsigned size) 828 void __user *addr, unsigned int size,
829 int type)
788{ 830{
789 int ret; 831 int ret;
790 832
791 /* This function should be called after iotlb
792 * prefetch, which means we're sure that vq
793 * could be access through iotlb. So -EAGAIN should
794 * not happen in this case.
795 */
796 /* TODO: more fast path */
797 ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, 833 ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
798 ARRAY_SIZE(vq->iotlb_iov), 834 ARRAY_SIZE(vq->iotlb_iov),
799 VHOST_ACCESS_RO); 835 VHOST_ACCESS_RO);
@@ -814,14 +850,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
814 return vq->iotlb_iov[0].iov_base; 850 return vq->iotlb_iov[0].iov_base;
815} 851}
816 852
817#define vhost_put_user(vq, x, ptr) \ 853/* This function should be called after iotlb
854 * prefetch, which means we're sure that vq
855 * could be access through iotlb. So -EAGAIN should
856 * not happen in this case.
857 */
858static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
859 void *addr, unsigned int size,
860 int type)
861{
862 void __user *uaddr = vhost_vq_meta_fetch(vq,
863 (u64)(uintptr_t)addr, size, type);
864 if (uaddr)
865 return uaddr;
866
867 return __vhost_get_user_slow(vq, addr, size, type);
868}
869
870#define vhost_put_user(vq, x, ptr) \
818({ \ 871({ \
819 int ret = -EFAULT; \ 872 int ret = -EFAULT; \
820 if (!vq->iotlb) { \ 873 if (!vq->iotlb) { \
821 ret = __put_user(x, ptr); \ 874 ret = __put_user(x, ptr); \
822 } else { \ 875 } else { \
823 __typeof__(ptr) to = \ 876 __typeof__(ptr) to = \
824 (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ 877 (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
878 sizeof(*ptr), VHOST_ADDR_USED); \
825 if (to != NULL) \ 879 if (to != NULL) \
826 ret = __put_user(x, to); \ 880 ret = __put_user(x, to); \
827 else \ 881 else \
@@ -830,14 +884,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
830 ret; \ 884 ret; \
831}) 885})
832 886
833#define vhost_get_user(vq, x, ptr) \ 887#define vhost_get_user(vq, x, ptr, type) \
834({ \ 888({ \
835 int ret; \ 889 int ret; \
836 if (!vq->iotlb) { \ 890 if (!vq->iotlb) { \
837 ret = __get_user(x, ptr); \ 891 ret = __get_user(x, ptr); \
838 } else { \ 892 } else { \
839 __typeof__(ptr) from = \ 893 __typeof__(ptr) from = \
840 (__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \ 894 (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
895 sizeof(*ptr), \
896 type); \
841 if (from != NULL) \ 897 if (from != NULL) \
842 ret = __get_user(x, from); \ 898 ret = __get_user(x, from); \
843 else \ 899 else \
@@ -846,6 +902,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
846 ret; \ 902 ret; \
847}) 903})
848 904
905#define vhost_get_avail(vq, x, ptr) \
906 vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
907
908#define vhost_get_used(vq, x, ptr) \
909 vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
910
849static void vhost_dev_lock_vqs(struct vhost_dev *d) 911static void vhost_dev_lock_vqs(struct vhost_dev *d)
850{ 912{
851 int i = 0; 913 int i = 0;
@@ -951,6 +1013,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
951 ret = -EFAULT; 1013 ret = -EFAULT;
952 break; 1014 break;
953 } 1015 }
1016 vhost_vq_meta_reset(dev);
954 if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size, 1017 if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
955 msg->iova + msg->size - 1, 1018 msg->iova + msg->size - 1,
956 msg->uaddr, msg->perm)) { 1019 msg->uaddr, msg->perm)) {
@@ -960,6 +1023,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
960 vhost_iotlb_notify_vq(dev, msg); 1023 vhost_iotlb_notify_vq(dev, msg);
961 break; 1024 break;
962 case VHOST_IOTLB_INVALIDATE: 1025 case VHOST_IOTLB_INVALIDATE:
1026 vhost_vq_meta_reset(dev);
963 vhost_del_umem_range(dev->iotlb, msg->iova, 1027 vhost_del_umem_range(dev->iotlb, msg->iova,
964 msg->iova + msg->size - 1); 1028 msg->iova + msg->size - 1);
965 break; 1029 break;
@@ -1103,12 +1167,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
1103 sizeof *used + num * sizeof *used->ring + s); 1167 sizeof *used + num * sizeof *used->ring + s);
1104} 1168}
1105 1169
1170static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
1171 const struct vhost_umem_node *node,
1172 int type)
1173{
1174 int access = (type == VHOST_ADDR_USED) ?
1175 VHOST_ACCESS_WO : VHOST_ACCESS_RO;
1176
1177 if (likely(node->perm & access))
1178 vq->meta_iotlb[type] = node;
1179}
1180
1106static int iotlb_access_ok(struct vhost_virtqueue *vq, 1181static int iotlb_access_ok(struct vhost_virtqueue *vq,
1107 int access, u64 addr, u64 len) 1182 int access, u64 addr, u64 len, int type)
1108{ 1183{
1109 const struct vhost_umem_node *node; 1184 const struct vhost_umem_node *node;
1110 struct vhost_umem *umem = vq->iotlb; 1185 struct vhost_umem *umem = vq->iotlb;
1111 u64 s = 0, size; 1186 u64 s = 0, size, orig_addr = addr;
1187
1188 if (vhost_vq_meta_fetch(vq, addr, len, type))
1189 return true;
1112 1190
1113 while (len > s) { 1191 while (len > s) {
1114 node = vhost_umem_interval_tree_iter_first(&umem->umem_tree, 1192 node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1125,6 +1203,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
1125 } 1203 }
1126 1204
1127 size = node->size - addr + node->start; 1205 size = node->size - addr + node->start;
1206
1207 if (orig_addr == addr && size >= len)
1208 vhost_vq_meta_update(vq, node, type);
1209
1128 s += size; 1210 s += size;
1129 addr += size; 1211 addr += size;
1130 } 1212 }
@@ -1141,13 +1223,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
1141 return 1; 1223 return 1;
1142 1224
1143 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, 1225 return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
1144 num * sizeof *vq->desc) && 1226 num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
1145 iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail, 1227 iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
1146 sizeof *vq->avail + 1228 sizeof *vq->avail +
1147 num * sizeof *vq->avail->ring + s) && 1229 num * sizeof(*vq->avail->ring) + s,
1230 VHOST_ADDR_AVAIL) &&
1148 iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used, 1231 iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
1149 sizeof *vq->used + 1232 sizeof *vq->used +
1150 num * sizeof *vq->used->ring + s); 1233 num * sizeof(*vq->used->ring) + s,
1234 VHOST_ADDR_USED);
1151} 1235}
1152EXPORT_SYMBOL_GPL(vq_iotlb_prefetch); 1236EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
1153 1237
@@ -1728,7 +1812,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
1728 r = -EFAULT; 1812 r = -EFAULT;
1729 goto err; 1813 goto err;
1730 } 1814 }
1731 r = vhost_get_user(vq, last_used_idx, &vq->used->idx); 1815 r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
1732 if (r) { 1816 if (r) {
1733 vq_err(vq, "Can't access used idx at %p\n", 1817 vq_err(vq, "Can't access used idx at %p\n",
1734 &vq->used->idx); 1818 &vq->used->idx);
@@ -1932,7 +2016,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
1932 last_avail_idx = vq->last_avail_idx; 2016 last_avail_idx = vq->last_avail_idx;
1933 2017
1934 if (vq->avail_idx == vq->last_avail_idx) { 2018 if (vq->avail_idx == vq->last_avail_idx) {
1935 if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) { 2019 if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
1936 vq_err(vq, "Failed to access avail idx at %p\n", 2020 vq_err(vq, "Failed to access avail idx at %p\n",
1937 &vq->avail->idx); 2021 &vq->avail->idx);
1938 return -EFAULT; 2022 return -EFAULT;
@@ -1959,7 +2043,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
1959 2043
1960 /* Grab the next descriptor number they're advertising, and increment 2044 /* Grab the next descriptor number they're advertising, and increment
1961 * the index we've seen. */ 2045 * the index we've seen. */
1962 if (unlikely(vhost_get_user(vq, ring_head, 2046 if (unlikely(vhost_get_avail(vq, ring_head,
1963 &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) { 2047 &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
1964 vq_err(vq, "Failed to read head: idx %d address %p\n", 2048 vq_err(vq, "Failed to read head: idx %d address %p\n",
1965 last_avail_idx, 2049 last_avail_idx,
@@ -2175,7 +2259,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2175 * with the barrier that the Guest executes when enabling 2259 * with the barrier that the Guest executes when enabling
2176 * interrupts. */ 2260 * interrupts. */
2177 smp_mb(); 2261 smp_mb();
2178 if (vhost_get_user(vq, flags, &vq->avail->flags)) { 2262 if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
2179 vq_err(vq, "Failed to get flags"); 2263 vq_err(vq, "Failed to get flags");
2180 return true; 2264 return true;
2181 } 2265 }
@@ -2202,7 +2286,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2202 * interrupts. */ 2286 * interrupts. */
2203 smp_mb(); 2287 smp_mb();
2204 2288
2205 if (vhost_get_user(vq, event, vhost_used_event(vq))) { 2289 if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
2206 vq_err(vq, "Failed to get used event idx"); 2290 vq_err(vq, "Failed to get used event idx");
2207 return true; 2291 return true;
2208 } 2292 }
@@ -2246,7 +2330,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2246 __virtio16 avail_idx; 2330 __virtio16 avail_idx;
2247 int r; 2331 int r;
2248 2332
2249 r = vhost_get_user(vq, avail_idx, &vq->avail->idx); 2333 r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
2250 if (r) 2334 if (r)
2251 return false; 2335 return false;
2252 2336
@@ -2281,7 +2365,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2281 /* They could have slipped one in as we were doing that: make 2365 /* They could have slipped one in as we were doing that: make
2282 * sure it's written, then check again. */ 2366 * sure it's written, then check again. */
2283 smp_mb(); 2367 smp_mb();
2284 r = vhost_get_user(vq, avail_idx, &vq->avail->idx); 2368 r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
2285 if (r) { 2369 if (r) {
2286 vq_err(vq, "Failed to check avail idx at %p: %d\n", 2370 vq_err(vq, "Failed to check avail idx at %p: %d\n",
2287 &vq->avail->idx, r); 2371 &vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index a9cbbb148f46..f55671d53f28 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -76,6 +76,13 @@ struct vhost_umem {
76 int numem; 76 int numem;
77}; 77};
78 78
79enum vhost_uaddr_type {
80 VHOST_ADDR_DESC = 0,
81 VHOST_ADDR_AVAIL = 1,
82 VHOST_ADDR_USED = 2,
83 VHOST_NUM_ADDRS = 3,
84};
85
79/* The virtqueue structure describes a queue attached to a device. */ 86/* The virtqueue structure describes a queue attached to a device. */
80struct vhost_virtqueue { 87struct vhost_virtqueue {
81 struct vhost_dev *dev; 88 struct vhost_dev *dev;
@@ -86,6 +93,7 @@ struct vhost_virtqueue {
86 struct vring_desc __user *desc; 93 struct vring_desc __user *desc;
87 struct vring_avail __user *avail; 94 struct vring_avail __user *avail;
88 struct vring_used __user *used; 95 struct vring_used __user *used;
96 const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
89 struct file *kick; 97 struct file *kick;
90 struct file *call; 98 struct file *call;
91 struct file *error; 99 struct file *error;