summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2019-01-16 03:54:42 -0500
committerDavid S. Miller <davem@davemloft.net>2019-01-18 00:43:24 -0500
commitcc5e710759470bc7f3c61d11fd54586f15fdbdf4 (patch)
treec636103033e16eb5d9218b7c7cdbd98a89c4077c
parentf655f8b818684716b2ebe35760c9b96184587d56 (diff)
vhost: log dirty page correctly
Vhost dirty page logging API is designed to sync through GPA. But we try to log GIOVA when device IOTLB is enabled. This is wrong and may lead to missing data after migration. To solve this issue, when logging with device IOTLB enabled, we will: 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to get HVA, for writable descriptor, get HVA through iovec. For used ring update, translate its GIOVA to HVA 2) traverse the GPA->HVA mapping to get the possible GPA and log through GPA. Pay attention this reverse mapping is not guaranteed to be unique, so we should log each possible GPA in this case. This fix the failure of scp to guest during migration. In -next, we will probably support passing GIOVA->GPA instead of GIOVA->HVA. Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") Reported-by: Jintack Lim <jintack@cs.columbia.edu> Cc: Jintack Lim <jintack@cs.columbia.edu> Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/vhost/net.c3
-rw-r--r--drivers/vhost/vhost.c97
-rw-r--r--drivers/vhost/vhost.h3
3 files changed, 87 insertions, 16 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 36f3d0f49e60..bca86bf7189f 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1236,7 +1236,8 @@ static void handle_rx(struct vhost_net *net)
1236 if (nvq->done_idx > VHOST_NET_BATCH) 1236 if (nvq->done_idx > VHOST_NET_BATCH)
1237 vhost_net_signal_used(nvq); 1237 vhost_net_signal_used(nvq);
1238 if (unlikely(vq_log)) 1238 if (unlikely(vq_log))
1239 vhost_log_write(vq, vq_log, log, vhost_len); 1239 vhost_log_write(vq, vq_log, log, vhost_len,
1240 vq->iov, in);
1240 total_len += vhost_len; 1241 total_len += vhost_len;
1241 if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { 1242 if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
1242 vhost_poll_queue(&vq->poll); 1243 vhost_poll_queue(&vq->poll);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9f7942cbcbb2..babbb32b9bf0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1733,13 +1733,87 @@ static int log_write(void __user *log_base,
1733 return r; 1733 return r;
1734} 1734}
1735 1735
1736static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
1737{
1738 struct vhost_umem *umem = vq->umem;
1739 struct vhost_umem_node *u;
1740 u64 start, end, l, min;
1741 int r;
1742 bool hit = false;
1743
1744 while (len) {
1745 min = len;
1746 /* More than one GPAs can be mapped into a single HVA. So
1747 * iterate all possible umems here to be safe.
1748 */
1749 list_for_each_entry(u, &umem->umem_list, link) {
1750 if (u->userspace_addr > hva - 1 + len ||
1751 u->userspace_addr - 1 + u->size < hva)
1752 continue;
1753 start = max(u->userspace_addr, hva);
1754 end = min(u->userspace_addr - 1 + u->size,
1755 hva - 1 + len);
1756 l = end - start + 1;
1757 r = log_write(vq->log_base,
1758 u->start + start - u->userspace_addr,
1759 l);
1760 if (r < 0)
1761 return r;
1762 hit = true;
1763 min = min(l, min);
1764 }
1765
1766 if (!hit)
1767 return -EFAULT;
1768
1769 len -= min;
1770 hva += min;
1771 }
1772
1773 return 0;
1774}
1775
1776static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
1777{
1778 struct iovec iov[64];
1779 int i, ret;
1780
1781 if (!vq->iotlb)
1782 return log_write(vq->log_base, vq->log_addr + used_offset, len);
1783
1784 ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
1785 len, iov, 64, VHOST_ACCESS_WO);
1786 if (ret)
1787 return ret;
1788
1789 for (i = 0; i < ret; i++) {
1790 ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
1791 iov[i].iov_len);
1792 if (ret)
1793 return ret;
1794 }
1795
1796 return 0;
1797}
1798
1736int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, 1799int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
1737 unsigned int log_num, u64 len) 1800 unsigned int log_num, u64 len, struct iovec *iov, int count)
1738{ 1801{
1739 int i, r; 1802 int i, r;
1740 1803
1741 /* Make sure data written is seen before log. */ 1804 /* Make sure data written is seen before log. */
1742 smp_wmb(); 1805 smp_wmb();
1806
1807 if (vq->iotlb) {
1808 for (i = 0; i < count; i++) {
1809 r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
1810 iov[i].iov_len);
1811 if (r < 0)
1812 return r;
1813 }
1814 return 0;
1815 }
1816
1743 for (i = 0; i < log_num; ++i) { 1817 for (i = 0; i < log_num; ++i) {
1744 u64 l = min(log[i].len, len); 1818 u64 l = min(log[i].len, len);
1745 r = log_write(vq->log_base, log[i].addr, l); 1819 r = log_write(vq->log_base, log[i].addr, l);
@@ -1769,9 +1843,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq)
1769 smp_wmb(); 1843 smp_wmb();
1770 /* Log used flag write. */ 1844 /* Log used flag write. */
1771 used = &vq->used->flags; 1845 used = &vq->used->flags;
1772 log_write(vq->log_base, vq->log_addr + 1846 log_used(vq, (used - (void __user *)vq->used),
1773 (used - (void __user *)vq->used), 1847 sizeof vq->used->flags);
1774 sizeof vq->used->flags);
1775 if (vq->log_ctx) 1848 if (vq->log_ctx)
1776 eventfd_signal(vq->log_ctx, 1); 1849 eventfd_signal(vq->log_ctx, 1);
1777 } 1850 }
@@ -1789,9 +1862,8 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
1789 smp_wmb(); 1862 smp_wmb();
1790 /* Log avail event write */ 1863 /* Log avail event write */
1791 used = vhost_avail_event(vq); 1864 used = vhost_avail_event(vq);
1792 log_write(vq->log_base, vq->log_addr + 1865 log_used(vq, (used - (void __user *)vq->used),
1793 (used - (void __user *)vq->used), 1866 sizeof *vhost_avail_event(vq));
1794 sizeof *vhost_avail_event(vq));
1795 if (vq->log_ctx) 1867 if (vq->log_ctx)
1796 eventfd_signal(vq->log_ctx, 1); 1868 eventfd_signal(vq->log_ctx, 1);
1797 } 1869 }
@@ -2191,10 +2263,8 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
2191 /* Make sure data is seen before log. */ 2263 /* Make sure data is seen before log. */
2192 smp_wmb(); 2264 smp_wmb();
2193 /* Log used ring entry write. */ 2265 /* Log used ring entry write. */
2194 log_write(vq->log_base, 2266 log_used(vq, ((void __user *)used - (void __user *)vq->used),
2195 vq->log_addr + 2267 count * sizeof *used);
2196 ((void __user *)used - (void __user *)vq->used),
2197 count * sizeof *used);
2198 } 2268 }
2199 old = vq->last_used_idx; 2269 old = vq->last_used_idx;
2200 new = (vq->last_used_idx += count); 2270 new = (vq->last_used_idx += count);
@@ -2236,9 +2306,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
2236 /* Make sure used idx is seen before log. */ 2306 /* Make sure used idx is seen before log. */
2237 smp_wmb(); 2307 smp_wmb();
2238 /* Log used index update. */ 2308 /* Log used index update. */
2239 log_write(vq->log_base, 2309 log_used(vq, offsetof(struct vring_used, idx),
2240 vq->log_addr + offsetof(struct vring_used, idx), 2310 sizeof vq->used->idx);
2241 sizeof vq->used->idx);
2242 if (vq->log_ctx) 2311 if (vq->log_ctx)
2243 eventfd_signal(vq->log_ctx, 1); 2312 eventfd_signal(vq->log_ctx, 1);
2244 } 2313 }
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 466ef7542291..1b675dad5e05 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -205,7 +205,8 @@ bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
205bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); 205bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
206 206
207int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, 207int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
208 unsigned int log_num, u64 len); 208 unsigned int log_num, u64 len,
209 struct iovec *iov, int count);
209int vq_iotlb_prefetch(struct vhost_virtqueue *vq); 210int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
210 211
211struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); 212struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);