aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/vhost/scsi.c426
-rw-r--r--drivers/virtio/virtio_balloon.c374
-rw-r--r--include/uapi/linux/virtio_balloon.h8
-rw-r--r--kernel/configs/kvm_guest.config1
-rw-r--r--mm/page_poison.c6
6 files changed, 685 insertions, 131 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 690c2f68a401..bb97067d0568 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15858,7 +15858,6 @@ F: net/vmw_vsock/virtio_transport_common.c
15858F: net/vmw_vsock/virtio_transport.c 15858F: net/vmw_vsock/virtio_transport.c
15859F: drivers/net/vsockmon.c 15859F: drivers/net/vsockmon.c
15860F: drivers/vhost/vsock.c 15860F: drivers/vhost/vsock.c
15861F: drivers/vhost/vsock.h
15862F: tools/testing/vsock/ 15861F: tools/testing/vsock/
15863 15862
15864VIRTIO CONSOLE DRIVER 15863VIRTIO CONSOLE DRIVER
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index c24bb690680b..50dffe83714c 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -203,6 +203,19 @@ struct vhost_scsi {
203 int vs_events_nr; /* num of pending events, protected by vq->mutex */ 203 int vs_events_nr; /* num of pending events, protected by vq->mutex */
204}; 204};
205 205
206/*
207 * Context for processing request and control queue operations.
208 */
209struct vhost_scsi_ctx {
210 int head;
211 unsigned int out, in;
212 size_t req_size, rsp_size;
213 size_t out_size, in_size;
214 u8 *target, *lunp;
215 void *req;
216 struct iov_iter out_iter;
217};
218
206static struct workqueue_struct *vhost_scsi_workqueue; 219static struct workqueue_struct *vhost_scsi_workqueue;
207 220
208/* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */ 221/* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
@@ -800,24 +813,120 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
800 pr_err("Faulted on virtio_scsi_cmd_resp\n"); 813 pr_err("Faulted on virtio_scsi_cmd_resp\n");
801} 814}
802 815
816static int
817vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
818 struct vhost_scsi_ctx *vc)
819{
820 int ret = -ENXIO;
821
822 vc->head = vhost_get_vq_desc(vq, vq->iov,
823 ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
824 NULL, NULL);
825
826 pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
827 vc->head, vc->out, vc->in);
828
829 /* On error, stop handling until the next kick. */
830 if (unlikely(vc->head < 0))
831 goto done;
832
833 /* Nothing new? Wait for eventfd to tell us they refilled. */
834 if (vc->head == vq->num) {
835 if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
836 vhost_disable_notify(&vs->dev, vq);
837 ret = -EAGAIN;
838 }
839 goto done;
840 }
841
842 /*
843 * Get the size of request and response buffers.
844 * FIXME: Not correct for BIDI operation
845 */
846 vc->out_size = iov_length(vq->iov, vc->out);
847 vc->in_size = iov_length(&vq->iov[vc->out], vc->in);
848
849 /*
850 * Copy over the virtio-scsi request header, which for a
851 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
852 * single iovec may contain both the header + outgoing
853 * WRITE payloads.
854 *
855 * copy_from_iter() will advance out_iter, so that it will
856 * point at the start of the outgoing WRITE payload, if
857 * DMA_TO_DEVICE is set.
858 */
859 iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size);
860 ret = 0;
861
862done:
863 return ret;
864}
865
866static int
867vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc)
868{
869 if (unlikely(vc->in_size < vc->rsp_size)) {
870 vq_err(vq,
871 "Response buf too small, need min %zu bytes got %zu",
872 vc->rsp_size, vc->in_size);
873 return -EINVAL;
874 } else if (unlikely(vc->out_size < vc->req_size)) {
875 vq_err(vq,
876 "Request buf too small, need min %zu bytes got %zu",
877 vc->req_size, vc->out_size);
878 return -EIO;
879 }
880
881 return 0;
882}
883
884static int
885vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
886 struct vhost_scsi_tpg **tpgp)
887{
888 int ret = -EIO;
889
890 if (unlikely(!copy_from_iter_full(vc->req, vc->req_size,
891 &vc->out_iter))) {
892 vq_err(vq, "Faulted on copy_from_iter\n");
893 } else if (unlikely(*vc->lunp != 1)) {
894 /* virtio-scsi spec requires byte 0 of the lun to be 1 */
895 vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
896 } else {
897 struct vhost_scsi_tpg **vs_tpg, *tpg;
898
899 vs_tpg = vq->private_data; /* validated at handler entry */
900
901 tpg = READ_ONCE(vs_tpg[*vc->target]);
902 if (unlikely(!tpg)) {
903 vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
904 } else {
905 if (tpgp)
906 *tpgp = tpg;
907 ret = 0;
908 }
909 }
910
911 return ret;
912}
913
803static void 914static void
804vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) 915vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
805{ 916{
806 struct vhost_scsi_tpg **vs_tpg, *tpg; 917 struct vhost_scsi_tpg **vs_tpg, *tpg;
807 struct virtio_scsi_cmd_req v_req; 918 struct virtio_scsi_cmd_req v_req;
808 struct virtio_scsi_cmd_req_pi v_req_pi; 919 struct virtio_scsi_cmd_req_pi v_req_pi;
920 struct vhost_scsi_ctx vc;
809 struct vhost_scsi_cmd *cmd; 921 struct vhost_scsi_cmd *cmd;
810 struct iov_iter out_iter, in_iter, prot_iter, data_iter; 922 struct iov_iter in_iter, prot_iter, data_iter;
811 u64 tag; 923 u64 tag;
812 u32 exp_data_len, data_direction; 924 u32 exp_data_len, data_direction;
813 unsigned int out = 0, in = 0; 925 int ret, prot_bytes;
814 int head, ret, prot_bytes;
815 size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
816 size_t out_size, in_size;
817 u16 lun; 926 u16 lun;
818 u8 *target, *lunp, task_attr; 927 u8 task_attr;
819 bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI); 928 bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
820 void *req, *cdb; 929 void *cdb;
821 930
822 mutex_lock(&vq->mutex); 931 mutex_lock(&vq->mutex);
823 /* 932 /*
@@ -828,85 +937,47 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
828 if (!vs_tpg) 937 if (!vs_tpg)
829 goto out; 938 goto out;
830 939
940 memset(&vc, 0, sizeof(vc));
941 vc.rsp_size = sizeof(struct virtio_scsi_cmd_resp);
942
831 vhost_disable_notify(&vs->dev, vq); 943 vhost_disable_notify(&vs->dev, vq);
832 944
833 for (;;) { 945 for (;;) {
834 head = vhost_get_vq_desc(vq, vq->iov, 946 ret = vhost_scsi_get_desc(vs, vq, &vc);
835 ARRAY_SIZE(vq->iov), &out, &in, 947 if (ret)
836 NULL, NULL); 948 goto err;
837 pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n", 949
838 head, out, in);
839 /* On error, stop handling until the next kick. */
840 if (unlikely(head < 0))
841 break;
842 /* Nothing new? Wait for eventfd to tell us they refilled. */
843 if (head == vq->num) {
844 if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
845 vhost_disable_notify(&vs->dev, vq);
846 continue;
847 }
848 break;
849 }
850 /*
851 * Check for a sane response buffer so we can report early
852 * errors back to the guest.
853 */
854 if (unlikely(vq->iov[out].iov_len < rsp_size)) {
855 vq_err(vq, "Expecting at least virtio_scsi_cmd_resp"
856 " size, got %zu bytes\n", vq->iov[out].iov_len);
857 break;
858 }
859 /* 950 /*
860 * Setup pointers and values based upon different virtio-scsi 951 * Setup pointers and values based upon different virtio-scsi
861 * request header if T10_PI is enabled in KVM guest. 952 * request header if T10_PI is enabled in KVM guest.
862 */ 953 */
863 if (t10_pi) { 954 if (t10_pi) {
864 req = &v_req_pi; 955 vc.req = &v_req_pi;
865 req_size = sizeof(v_req_pi); 956 vc.req_size = sizeof(v_req_pi);
866 lunp = &v_req_pi.lun[0]; 957 vc.lunp = &v_req_pi.lun[0];
867 target = &v_req_pi.lun[1]; 958 vc.target = &v_req_pi.lun[1];
868 } else { 959 } else {
869 req = &v_req; 960 vc.req = &v_req;
870 req_size = sizeof(v_req); 961 vc.req_size = sizeof(v_req);
871 lunp = &v_req.lun[0]; 962 vc.lunp = &v_req.lun[0];
872 target = &v_req.lun[1]; 963 vc.target = &v_req.lun[1];
873 } 964 }
874 /*
875 * FIXME: Not correct for BIDI operation
876 */
877 out_size = iov_length(vq->iov, out);
878 in_size = iov_length(&vq->iov[out], in);
879 965
880 /* 966 /*
881 * Copy over the virtio-scsi request header, which for a 967 * Validate the size of request and response buffers.
882 * ANY_LAYOUT enabled guest may span multiple iovecs, or a 968 * Check for a sane response buffer so we can report
883 * single iovec may contain both the header + outgoing 969 * early errors back to the guest.
884 * WRITE payloads.
885 *
886 * copy_from_iter() will advance out_iter, so that it will
887 * point at the start of the outgoing WRITE payload, if
888 * DMA_TO_DEVICE is set.
889 */ 970 */
890 iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size); 971 ret = vhost_scsi_chk_size(vq, &vc);
972 if (ret)
973 goto err;
891 974
892 if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) { 975 ret = vhost_scsi_get_req(vq, &vc, &tpg);
893 vq_err(vq, "Faulted on copy_from_iter\n"); 976 if (ret)
894 vhost_scsi_send_bad_target(vs, vq, head, out); 977 goto err;
895 continue; 978
896 } 979 ret = -EIO; /* bad target on any error from here on */
897 /* virtio-scsi spec requires byte 0 of the lun to be 1 */
898 if (unlikely(*lunp != 1)) {
899 vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
900 vhost_scsi_send_bad_target(vs, vq, head, out);
901 continue;
902 }
903 980
904 tpg = READ_ONCE(vs_tpg[*target]);
905 if (unlikely(!tpg)) {
906 /* Target does not exist, fail the request */
907 vhost_scsi_send_bad_target(vs, vq, head, out);
908 continue;
909 }
910 /* 981 /*
911 * Determine data_direction by calculating the total outgoing 982 * Determine data_direction by calculating the total outgoing
912 * iovec sizes + incoming iovec sizes vs. virtio-scsi request + 983 * iovec sizes + incoming iovec sizes vs. virtio-scsi request +
@@ -924,17 +995,17 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
924 */ 995 */
925 prot_bytes = 0; 996 prot_bytes = 0;
926 997
927 if (out_size > req_size) { 998 if (vc.out_size > vc.req_size) {
928 data_direction = DMA_TO_DEVICE; 999 data_direction = DMA_TO_DEVICE;
929 exp_data_len = out_size - req_size; 1000 exp_data_len = vc.out_size - vc.req_size;
930 data_iter = out_iter; 1001 data_iter = vc.out_iter;
931 } else if (in_size > rsp_size) { 1002 } else if (vc.in_size > vc.rsp_size) {
932 data_direction = DMA_FROM_DEVICE; 1003 data_direction = DMA_FROM_DEVICE;
933 exp_data_len = in_size - rsp_size; 1004 exp_data_len = vc.in_size - vc.rsp_size;
934 1005
935 iov_iter_init(&in_iter, READ, &vq->iov[out], in, 1006 iov_iter_init(&in_iter, READ, &vq->iov[vc.out], vc.in,
936 rsp_size + exp_data_len); 1007 vc.rsp_size + exp_data_len);
937 iov_iter_advance(&in_iter, rsp_size); 1008 iov_iter_advance(&in_iter, vc.rsp_size);
938 data_iter = in_iter; 1009 data_iter = in_iter;
939 } else { 1010 } else {
940 data_direction = DMA_NONE; 1011 data_direction = DMA_NONE;
@@ -950,21 +1021,20 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
950 if (data_direction != DMA_TO_DEVICE) { 1021 if (data_direction != DMA_TO_DEVICE) {
951 vq_err(vq, "Received non zero pi_bytesout," 1022 vq_err(vq, "Received non zero pi_bytesout,"
952 " but wrong data_direction\n"); 1023 " but wrong data_direction\n");
953 vhost_scsi_send_bad_target(vs, vq, head, out); 1024 goto err;
954 continue;
955 } 1025 }
956 prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout); 1026 prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout);
957 } else if (v_req_pi.pi_bytesin) { 1027 } else if (v_req_pi.pi_bytesin) {
958 if (data_direction != DMA_FROM_DEVICE) { 1028 if (data_direction != DMA_FROM_DEVICE) {
959 vq_err(vq, "Received non zero pi_bytesin," 1029 vq_err(vq, "Received non zero pi_bytesin,"
960 " but wrong data_direction\n"); 1030 " but wrong data_direction\n");
961 vhost_scsi_send_bad_target(vs, vq, head, out); 1031 goto err;
962 continue;
963 } 1032 }
964 prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin); 1033 prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
965 } 1034 }
966 /* 1035 /*
967 * Set prot_iter to data_iter, and advance past any 1036 * Set prot_iter to data_iter and truncate it to
1037 * prot_bytes, and advance data_iter past any
968 * preceeding prot_bytes that may be present. 1038 * preceeding prot_bytes that may be present.
969 * 1039 *
970 * Also fix up the exp_data_len to reflect only the 1040 * Also fix up the exp_data_len to reflect only the
@@ -973,6 +1043,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
973 if (prot_bytes) { 1043 if (prot_bytes) {
974 exp_data_len -= prot_bytes; 1044 exp_data_len -= prot_bytes;
975 prot_iter = data_iter; 1045 prot_iter = data_iter;
1046 iov_iter_truncate(&prot_iter, prot_bytes);
976 iov_iter_advance(&data_iter, prot_bytes); 1047 iov_iter_advance(&data_iter, prot_bytes);
977 } 1048 }
978 tag = vhost64_to_cpu(vq, v_req_pi.tag); 1049 tag = vhost64_to_cpu(vq, v_req_pi.tag);
@@ -996,8 +1067,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
996 vq_err(vq, "Received SCSI CDB with command_size: %d that" 1067 vq_err(vq, "Received SCSI CDB with command_size: %d that"
997 " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", 1068 " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
998 scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE); 1069 scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
999 vhost_scsi_send_bad_target(vs, vq, head, out); 1070 goto err;
1000 continue;
1001 } 1071 }
1002 cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr, 1072 cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
1003 exp_data_len + prot_bytes, 1073 exp_data_len + prot_bytes,
@@ -1005,13 +1075,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
1005 if (IS_ERR(cmd)) { 1075 if (IS_ERR(cmd)) {
1006 vq_err(vq, "vhost_scsi_get_tag failed %ld\n", 1076 vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
1007 PTR_ERR(cmd)); 1077 PTR_ERR(cmd));
1008 vhost_scsi_send_bad_target(vs, vq, head, out); 1078 goto err;
1009 continue;
1010 } 1079 }
1011 cmd->tvc_vhost = vs; 1080 cmd->tvc_vhost = vs;
1012 cmd->tvc_vq = vq; 1081 cmd->tvc_vq = vq;
1013 cmd->tvc_resp_iov = vq->iov[out]; 1082 cmd->tvc_resp_iov = vq->iov[vc.out];
1014 cmd->tvc_in_iovs = in; 1083 cmd->tvc_in_iovs = vc.in;
1015 1084
1016 pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", 1085 pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
1017 cmd->tvc_cdb[0], cmd->tvc_lun); 1086 cmd->tvc_cdb[0], cmd->tvc_lun);
@@ -1019,14 +1088,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
1019 " %d\n", cmd, exp_data_len, prot_bytes, data_direction); 1088 " %d\n", cmd, exp_data_len, prot_bytes, data_direction);
1020 1089
1021 if (data_direction != DMA_NONE) { 1090 if (data_direction != DMA_NONE) {
1022 ret = vhost_scsi_mapal(cmd, 1091 if (unlikely(vhost_scsi_mapal(cmd, prot_bytes,
1023 prot_bytes, &prot_iter, 1092 &prot_iter, exp_data_len,
1024 exp_data_len, &data_iter); 1093 &data_iter))) {
1025 if (unlikely(ret)) {
1026 vq_err(vq, "Failed to map iov to sgl\n"); 1094 vq_err(vq, "Failed to map iov to sgl\n");
1027 vhost_scsi_release_cmd(&cmd->tvc_se_cmd); 1095 vhost_scsi_release_cmd(&cmd->tvc_se_cmd);
1028 vhost_scsi_send_bad_target(vs, vq, head, out); 1096 goto err;
1029 continue;
1030 } 1097 }
1031 } 1098 }
1032 /* 1099 /*
@@ -1034,7 +1101,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
1034 * complete the virtio-scsi request in TCM callback context via 1101 * complete the virtio-scsi request in TCM callback context via
1035 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status() 1102 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
1036 */ 1103 */
1037 cmd->tvc_vq_desc = head; 1104 cmd->tvc_vq_desc = vc.head;
1038 /* 1105 /*
1039 * Dispatch cmd descriptor for cmwq execution in process 1106 * Dispatch cmd descriptor for cmwq execution in process
1040 * context provided by vhost_scsi_workqueue. This also ensures 1107 * context provided by vhost_scsi_workqueue. This also ensures
@@ -1043,6 +1110,166 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
1043 */ 1110 */
1044 INIT_WORK(&cmd->work, vhost_scsi_submission_work); 1111 INIT_WORK(&cmd->work, vhost_scsi_submission_work);
1045 queue_work(vhost_scsi_workqueue, &cmd->work); 1112 queue_work(vhost_scsi_workqueue, &cmd->work);
1113 ret = 0;
1114err:
1115 /*
1116 * ENXIO: No more requests, or read error, wait for next kick
1117 * EINVAL: Invalid response buffer, drop the request
1118 * EIO: Respond with bad target
1119 * EAGAIN: Pending request
1120 */
1121 if (ret == -ENXIO)
1122 break;
1123 else if (ret == -EIO)
1124 vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
1125 }
1126out:
1127 mutex_unlock(&vq->mutex);
1128}
1129
1130static void
1131vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
1132 struct vhost_virtqueue *vq,
1133 struct vhost_scsi_ctx *vc)
1134{
1135 struct virtio_scsi_ctrl_tmf_resp __user *resp;
1136 struct virtio_scsi_ctrl_tmf_resp rsp;
1137 int ret;
1138
1139 pr_debug("%s\n", __func__);
1140 memset(&rsp, 0, sizeof(rsp));
1141 rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
1142 resp = vq->iov[vc->out].iov_base;
1143 ret = __copy_to_user(resp, &rsp, sizeof(rsp));
1144 if (!ret)
1145 vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
1146 else
1147 pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
1148}
1149
1150static void
1151vhost_scsi_send_an_resp(struct vhost_scsi *vs,
1152 struct vhost_virtqueue *vq,
1153 struct vhost_scsi_ctx *vc)
1154{
1155 struct virtio_scsi_ctrl_an_resp __user *resp;
1156 struct virtio_scsi_ctrl_an_resp rsp;
1157 int ret;
1158
1159 pr_debug("%s\n", __func__);
1160 memset(&rsp, 0, sizeof(rsp)); /* event_actual = 0 */
1161 rsp.response = VIRTIO_SCSI_S_OK;
1162 resp = vq->iov[vc->out].iov_base;
1163 ret = __copy_to_user(resp, &rsp, sizeof(rsp));
1164 if (!ret)
1165 vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
1166 else
1167 pr_err("Faulted on virtio_scsi_ctrl_an_resp\n");
1168}
1169
1170static void
1171vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
1172{
1173 union {
1174 __virtio32 type;
1175 struct virtio_scsi_ctrl_an_req an;
1176 struct virtio_scsi_ctrl_tmf_req tmf;
1177 } v_req;
1178 struct vhost_scsi_ctx vc;
1179 size_t typ_size;
1180 int ret;
1181
1182 mutex_lock(&vq->mutex);
1183 /*
1184 * We can handle the vq only after the endpoint is setup by calling the
1185 * VHOST_SCSI_SET_ENDPOINT ioctl.
1186 */
1187 if (!vq->private_data)
1188 goto out;
1189
1190 memset(&vc, 0, sizeof(vc));
1191
1192 vhost_disable_notify(&vs->dev, vq);
1193
1194 for (;;) {
1195 ret = vhost_scsi_get_desc(vs, vq, &vc);
1196 if (ret)
1197 goto err;
1198
1199 /*
1200 * Get the request type first in order to setup
1201 * other parameters dependent on the type.
1202 */
1203 vc.req = &v_req.type;
1204 typ_size = sizeof(v_req.type);
1205
1206 if (unlikely(!copy_from_iter_full(vc.req, typ_size,
1207 &vc.out_iter))) {
1208 vq_err(vq, "Faulted on copy_from_iter tmf type\n");
1209 /*
1210 * The size of the response buffer depends on the
1211 * request type and must be validated against it.
1212 * Since the request type is not known, don't send
1213 * a response.
1214 */
1215 continue;
1216 }
1217
1218 switch (v_req.type) {
1219 case VIRTIO_SCSI_T_TMF:
1220 vc.req = &v_req.tmf;
1221 vc.req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
1222 vc.rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
1223 vc.lunp = &v_req.tmf.lun[0];
1224 vc.target = &v_req.tmf.lun[1];
1225 break;
1226 case VIRTIO_SCSI_T_AN_QUERY:
1227 case VIRTIO_SCSI_T_AN_SUBSCRIBE:
1228 vc.req = &v_req.an;
1229 vc.req_size = sizeof(struct virtio_scsi_ctrl_an_req);
1230 vc.rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
1231 vc.lunp = &v_req.an.lun[0];
1232 vc.target = NULL;
1233 break;
1234 default:
1235 vq_err(vq, "Unknown control request %d", v_req.type);
1236 continue;
1237 }
1238
1239 /*
1240 * Validate the size of request and response buffers.
1241 * Check for a sane response buffer so we can report
1242 * early errors back to the guest.
1243 */
1244 ret = vhost_scsi_chk_size(vq, &vc);
1245 if (ret)
1246 goto err;
1247
1248 /*
1249 * Get the rest of the request now that its size is known.
1250 */
1251 vc.req += typ_size;
1252 vc.req_size -= typ_size;
1253
1254 ret = vhost_scsi_get_req(vq, &vc, NULL);
1255 if (ret)
1256 goto err;
1257
1258 if (v_req.type == VIRTIO_SCSI_T_TMF)
1259 vhost_scsi_send_tmf_reject(vs, vq, &vc);
1260 else
1261 vhost_scsi_send_an_resp(vs, vq, &vc);
1262err:
1263 /*
1264 * ENXIO: No more requests, or read error, wait for next kick
1265 * EINVAL: Invalid response buffer, drop the request
1266 * EIO: Respond with bad target
1267 * EAGAIN: Pending request
1268 */
1269 if (ret == -ENXIO)
1270 break;
1271 else if (ret == -EIO)
1272 vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
1046 } 1273 }
1047out: 1274out:
1048 mutex_unlock(&vq->mutex); 1275 mutex_unlock(&vq->mutex);
@@ -1050,7 +1277,12 @@ out:
1050 1277
1051static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) 1278static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
1052{ 1279{
1280 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
1281 poll.work);
1282 struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
1283
1053 pr_debug("%s: The handling func for control queue.\n", __func__); 1284 pr_debug("%s: The handling func for control queue.\n", __func__);
1285 vhost_scsi_ctl_handle_vq(vs, vq);
1054} 1286}
1055 1287
1056static void 1288static void
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d1c1f6283729..728ecd1eea30 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -41,13 +41,34 @@
41#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 41#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
42#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 42#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
43 43
44#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
45 __GFP_NOMEMALLOC)
46/* The order of free page blocks to report to host */
47#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
48/* The size of a free page block in bytes */
49#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
50 (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
51
44#ifdef CONFIG_BALLOON_COMPACTION 52#ifdef CONFIG_BALLOON_COMPACTION
45static struct vfsmount *balloon_mnt; 53static struct vfsmount *balloon_mnt;
46#endif 54#endif
47 55
56enum virtio_balloon_vq {
57 VIRTIO_BALLOON_VQ_INFLATE,
58 VIRTIO_BALLOON_VQ_DEFLATE,
59 VIRTIO_BALLOON_VQ_STATS,
60 VIRTIO_BALLOON_VQ_FREE_PAGE,
61 VIRTIO_BALLOON_VQ_MAX
62};
63
48struct virtio_balloon { 64struct virtio_balloon {
49 struct virtio_device *vdev; 65 struct virtio_device *vdev;
50 struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; 66 struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
67
68 /* Balloon's own wq for cpu-intensive work items */
69 struct workqueue_struct *balloon_wq;
70 /* The free page reporting work item submitted to the balloon wq */
71 struct work_struct report_free_page_work;
51 72
52 /* The balloon servicing is delegated to a freezable workqueue. */ 73 /* The balloon servicing is delegated to a freezable workqueue. */
53 struct work_struct update_balloon_stats_work; 74 struct work_struct update_balloon_stats_work;
@@ -57,6 +78,18 @@ struct virtio_balloon {
57 spinlock_t stop_update_lock; 78 spinlock_t stop_update_lock;
58 bool stop_update; 79 bool stop_update;
59 80
81 /* The list of allocated free pages, waiting to be given back to mm */
82 struct list_head free_page_list;
83 spinlock_t free_page_list_lock;
84 /* The number of free page blocks on the above list */
85 unsigned long num_free_page_blocks;
86 /* The cmd id received from host */
87 u32 cmd_id_received;
88 /* The cmd id that is actively in use */
89 __virtio32 cmd_id_active;
90 /* Buffer to store the stop sign */
91 __virtio32 cmd_id_stop;
92
60 /* Waiting for host to ack the pages we released. */ 93 /* Waiting for host to ack the pages we released. */
61 wait_queue_head_t acked; 94 wait_queue_head_t acked;
62 95
@@ -320,17 +353,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
320 virtqueue_kick(vq); 353 virtqueue_kick(vq);
321} 354}
322 355
323static void virtballoon_changed(struct virtio_device *vdev)
324{
325 struct virtio_balloon *vb = vdev->priv;
326 unsigned long flags;
327
328 spin_lock_irqsave(&vb->stop_update_lock, flags);
329 if (!vb->stop_update)
330 queue_work(system_freezable_wq, &vb->update_balloon_size_work);
331 spin_unlock_irqrestore(&vb->stop_update_lock, flags);
332}
333
334static inline s64 towards_target(struct virtio_balloon *vb) 356static inline s64 towards_target(struct virtio_balloon *vb)
335{ 357{
336 s64 target; 358 s64 target;
@@ -347,6 +369,60 @@ static inline s64 towards_target(struct virtio_balloon *vb)
347 return target - vb->num_pages; 369 return target - vb->num_pages;
348} 370}
349 371
372/* Gives back @num_to_return blocks of free pages to mm. */
373static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
374 unsigned long num_to_return)
375{
376 struct page *page;
377 unsigned long num_returned;
378
379 spin_lock_irq(&vb->free_page_list_lock);
380 for (num_returned = 0; num_returned < num_to_return; num_returned++) {
381 page = balloon_page_pop(&vb->free_page_list);
382 if (!page)
383 break;
384 free_pages((unsigned long)page_address(page),
385 VIRTIO_BALLOON_FREE_PAGE_ORDER);
386 }
387 vb->num_free_page_blocks -= num_returned;
388 spin_unlock_irq(&vb->free_page_list_lock);
389
390 return num_returned;
391}
392
393static void virtballoon_changed(struct virtio_device *vdev)
394{
395 struct virtio_balloon *vb = vdev->priv;
396 unsigned long flags;
397 s64 diff = towards_target(vb);
398
399 if (diff) {
400 spin_lock_irqsave(&vb->stop_update_lock, flags);
401 if (!vb->stop_update)
402 queue_work(system_freezable_wq,
403 &vb->update_balloon_size_work);
404 spin_unlock_irqrestore(&vb->stop_update_lock, flags);
405 }
406
407 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
408 virtio_cread(vdev, struct virtio_balloon_config,
409 free_page_report_cmd_id, &vb->cmd_id_received);
410 if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
411 /* Pass ULONG_MAX to give back all the free pages */
412 return_free_pages_to_mm(vb, ULONG_MAX);
413 } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
414 vb->cmd_id_received !=
415 virtio32_to_cpu(vdev, vb->cmd_id_active)) {
416 spin_lock_irqsave(&vb->stop_update_lock, flags);
417 if (!vb->stop_update) {
418 queue_work(vb->balloon_wq,
419 &vb->report_free_page_work);
420 }
421 spin_unlock_irqrestore(&vb->stop_update_lock, flags);
422 }
423 }
424}
425
350static void update_balloon_size(struct virtio_balloon *vb) 426static void update_balloon_size(struct virtio_balloon *vb)
351{ 427{
352 u32 actual = vb->num_pages; 428 u32 actual = vb->num_pages;
@@ -389,26 +465,44 @@ static void update_balloon_size_func(struct work_struct *work)
389 465
390static int init_vqs(struct virtio_balloon *vb) 466static int init_vqs(struct virtio_balloon *vb)
391{ 467{
392 struct virtqueue *vqs[3]; 468 struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
393 vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; 469 vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
394 static const char * const names[] = { "inflate", "deflate", "stats" }; 470 const char *names[VIRTIO_BALLOON_VQ_MAX];
395 int err, nvqs; 471 int err;
396 472
397 /* 473 /*
398 * We expect two virtqueues: inflate and deflate, and 474 * Inflateq and deflateq are used unconditionally. The names[]
399 * optionally stat. 475 * will be NULL if the related feature is not enabled, which will
476 * cause no allocation for the corresponding virtqueue in find_vqs.
400 */ 477 */
401 nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; 478 callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
402 err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); 479 names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
480 callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
481 names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
482 names[VIRTIO_BALLOON_VQ_STATS] = NULL;
483 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
484
485 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
486 names[VIRTIO_BALLOON_VQ_STATS] = "stats";
487 callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
488 }
489
490 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
491 names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
492 callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
493 }
494
495 err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
496 vqs, callbacks, names, NULL, NULL);
403 if (err) 497 if (err)
404 return err; 498 return err;
405 499
406 vb->inflate_vq = vqs[0]; 500 vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
407 vb->deflate_vq = vqs[1]; 501 vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
408 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { 502 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
409 struct scatterlist sg; 503 struct scatterlist sg;
410 unsigned int num_stats; 504 unsigned int num_stats;
411 vb->stats_vq = vqs[2]; 505 vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
412 506
413 /* 507 /*
414 * Prime this virtqueue with one buffer so the hypervisor can 508 * Prime this virtqueue with one buffer so the hypervisor can
@@ -426,9 +520,145 @@ static int init_vqs(struct virtio_balloon *vb)
426 } 520 }
427 virtqueue_kick(vb->stats_vq); 521 virtqueue_kick(vb->stats_vq);
428 } 522 }
523
524 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
525 vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
526
527 return 0;
528}
529
530static int send_cmd_id_start(struct virtio_balloon *vb)
531{
532 struct scatterlist sg;
533 struct virtqueue *vq = vb->free_page_vq;
534 int err, unused;
535
536 /* Detach all the used buffers from the vq */
537 while (virtqueue_get_buf(vq, &unused))
538 ;
539
540 vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received);
541 sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
542 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
543 if (!err)
544 virtqueue_kick(vq);
545 return err;
546}
547
548static int send_cmd_id_stop(struct virtio_balloon *vb)
549{
550 struct scatterlist sg;
551 struct virtqueue *vq = vb->free_page_vq;
552 int err, unused;
553
554 /* Detach all the used buffers from the vq */
555 while (virtqueue_get_buf(vq, &unused))
556 ;
557
558 sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
559 err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
560 if (!err)
561 virtqueue_kick(vq);
562 return err;
563}
564
565static int get_free_page_and_send(struct virtio_balloon *vb)
566{
567 struct virtqueue *vq = vb->free_page_vq;
568 struct page *page;
569 struct scatterlist sg;
570 int err, unused;
571 void *p;
572
573 /* Detach all the used buffers from the vq */
574 while (virtqueue_get_buf(vq, &unused))
575 ;
576
577 page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
578 VIRTIO_BALLOON_FREE_PAGE_ORDER);
579 /*
580 * When the allocation returns NULL, it indicates that we have got all
581 * the possible free pages, so return -EINTR to stop.
582 */
583 if (!page)
584 return -EINTR;
585
586 p = page_address(page);
587 sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
588 /* There is always 1 entry reserved for the cmd id to use. */
589 if (vq->num_free > 1) {
590 err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
591 if (unlikely(err)) {
592 free_pages((unsigned long)p,
593 VIRTIO_BALLOON_FREE_PAGE_ORDER);
594 return err;
595 }
596 virtqueue_kick(vq);
597 spin_lock_irq(&vb->free_page_list_lock);
598 balloon_page_push(&vb->free_page_list, page);
599 vb->num_free_page_blocks++;
600 spin_unlock_irq(&vb->free_page_list_lock);
601 } else {
602 /*
603 * The vq has no available entry to add this page block, so
604 * just free it.
605 */
606 free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
607 }
608
609 return 0;
610}
611
612static int send_free_pages(struct virtio_balloon *vb)
613{
614 int err;
615 u32 cmd_id_active;
616
617 while (1) {
618 /*
619 * If a stop id or a new cmd id was just received from host,
620 * stop the reporting.
621 */
622 cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
623 if (cmd_id_active != vb->cmd_id_received)
624 break;
625
626 /*
627 * The free page blocks are allocated and sent to host one by
628 * one.
629 */
630 err = get_free_page_and_send(vb);
631 if (err == -EINTR)
632 break;
633 else if (unlikely(err))
634 return err;
635 }
636
429 return 0; 637 return 0;
430} 638}
431 639
640static void report_free_page_func(struct work_struct *work)
641{
642 int err;
643 struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
644 report_free_page_work);
645 struct device *dev = &vb->vdev->dev;
646
647 /* Start by sending the received cmd id to host with an outbuf. */
648 err = send_cmd_id_start(vb);
649 if (unlikely(err))
650 dev_err(dev, "Failed to send a start id, err = %d\n", err);
651
652 err = send_free_pages(vb);
653 if (unlikely(err))
654 dev_err(dev, "Failed to send a free page, err = %d\n", err);
655
656 /* End by sending a stop id to host with an outbuf. */
657 err = send_cmd_id_stop(vb);
658 if (unlikely(err))
659 dev_err(dev, "Failed to send a stop id, err = %d\n", err);
660}
661
432#ifdef CONFIG_BALLOON_COMPACTION 662#ifdef CONFIG_BALLOON_COMPACTION
433/* 663/*
434 * virtballoon_migratepage - perform the balloon page migration on behalf of 664 * virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -512,14 +742,23 @@ static struct file_system_type balloon_fs = {
512 742
513#endif /* CONFIG_BALLOON_COMPACTION */ 743#endif /* CONFIG_BALLOON_COMPACTION */
514 744
515static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, 745static unsigned long shrink_free_pages(struct virtio_balloon *vb,
516 struct shrink_control *sc) 746 unsigned long pages_to_free)
517{ 747{
518 unsigned long pages_to_free, pages_freed = 0; 748 unsigned long blocks_to_free, blocks_freed;
519 struct virtio_balloon *vb = container_of(shrinker,
520 struct virtio_balloon, shrinker);
521 749
522 pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; 750 pages_to_free = round_up(pages_to_free,
751 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
752 blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
753 blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
754
755 return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
756}
757
758static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
759 unsigned long pages_to_free)
760{
761 unsigned long pages_freed = 0;
523 762
524 /* 763 /*
525 * One invocation of leak_balloon can deflate at most 764 * One invocation of leak_balloon can deflate at most
@@ -527,12 +766,33 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
527 * multiple times to deflate pages till reaching pages_to_free. 766 * multiple times to deflate pages till reaching pages_to_free.
528 */ 767 */
529 while (vb->num_pages && pages_to_free) { 768 while (vb->num_pages && pages_to_free) {
769 pages_freed += leak_balloon(vb, pages_to_free) /
770 VIRTIO_BALLOON_PAGES_PER_PAGE;
530 pages_to_free -= pages_freed; 771 pages_to_free -= pages_freed;
531 pages_freed += leak_balloon(vb, pages_to_free);
532 } 772 }
533 update_balloon_size(vb); 773 update_balloon_size(vb);
534 774
535 return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE; 775 return pages_freed;
776}
777
778static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
779 struct shrink_control *sc)
780{
781 unsigned long pages_to_free, pages_freed = 0;
782 struct virtio_balloon *vb = container_of(shrinker,
783 struct virtio_balloon, shrinker);
784
785 pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
786
787 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
788 pages_freed = shrink_free_pages(vb, pages_to_free);
789
790 if (pages_freed >= pages_to_free)
791 return pages_freed;
792
793 pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
794
795 return pages_freed;
536} 796}
537 797
538static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, 798static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -540,8 +800,12 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
540{ 800{
541 struct virtio_balloon *vb = container_of(shrinker, 801 struct virtio_balloon *vb = container_of(shrinker,
542 struct virtio_balloon, shrinker); 802 struct virtio_balloon, shrinker);
803 unsigned long count;
543 804
544 return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; 805 count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
806 count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
807
808 return count;
545} 809}
546 810
547static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) 811static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -561,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
561static int virtballoon_probe(struct virtio_device *vdev) 825static int virtballoon_probe(struct virtio_device *vdev)
562{ 826{
563 struct virtio_balloon *vb; 827 struct virtio_balloon *vb;
828 __u32 poison_val;
564 int err; 829 int err;
565 830
566 if (!vdev->config->get) { 831 if (!vdev->config->get) {
@@ -604,6 +869,36 @@ static int virtballoon_probe(struct virtio_device *vdev)
604 } 869 }
605 vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; 870 vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
606#endif 871#endif
872 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
873 /*
874 * There is always one entry reserved for cmd id, so the ring
875 * size needs to be at least two to report free page hints.
876 */
877 if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
878 err = -ENOSPC;
879 goto out_del_vqs;
880 }
881 vb->balloon_wq = alloc_workqueue("balloon-wq",
882 WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
883 if (!vb->balloon_wq) {
884 err = -ENOMEM;
885 goto out_del_vqs;
886 }
887 INIT_WORK(&vb->report_free_page_work, report_free_page_func);
888 vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
889 vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
890 VIRTIO_BALLOON_CMD_ID_STOP);
891 vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
892 VIRTIO_BALLOON_CMD_ID_STOP);
893 vb->num_free_page_blocks = 0;
894 spin_lock_init(&vb->free_page_list_lock);
895 INIT_LIST_HEAD(&vb->free_page_list);
896 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
897 memset(&poison_val, PAGE_POISON, sizeof(poison_val));
898 virtio_cwrite(vb->vdev, struct virtio_balloon_config,
899 poison_val, &poison_val);
900 }
901 }
607 /* 902 /*
608 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a 903 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
609 * shrinker needs to be registered to relieve memory pressure. 904 * shrinker needs to be registered to relieve memory pressure.
@@ -611,7 +906,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
611 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { 906 if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
612 err = virtio_balloon_register_shrinker(vb); 907 err = virtio_balloon_register_shrinker(vb);
613 if (err) 908 if (err)
614 goto out_del_vqs; 909 goto out_del_balloon_wq;
615 } 910 }
616 virtio_device_ready(vdev); 911 virtio_device_ready(vdev);
617 912
@@ -619,6 +914,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
619 virtballoon_changed(vdev); 914 virtballoon_changed(vdev);
620 return 0; 915 return 0;
621 916
917out_del_balloon_wq:
918 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
919 destroy_workqueue(vb->balloon_wq);
622out_del_vqs: 920out_del_vqs:
623 vdev->config->del_vqs(vdev); 921 vdev->config->del_vqs(vdev);
624out_free_vb: 922out_free_vb:
@@ -652,6 +950,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
652 cancel_work_sync(&vb->update_balloon_size_work); 950 cancel_work_sync(&vb->update_balloon_size_work);
653 cancel_work_sync(&vb->update_balloon_stats_work); 951 cancel_work_sync(&vb->update_balloon_stats_work);
654 952
953 if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
954 cancel_work_sync(&vb->report_free_page_work);
955 destroy_workqueue(vb->balloon_wq);
956 }
957
655 remove_common(vb); 958 remove_common(vb);
656#ifdef CONFIG_BALLOON_COMPACTION 959#ifdef CONFIG_BALLOON_COMPACTION
657 if (vb->vb_dev_info.inode) 960 if (vb->vb_dev_info.inode)
@@ -695,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev)
695 998
696static int virtballoon_validate(struct virtio_device *vdev) 999static int virtballoon_validate(struct virtio_device *vdev)
697{ 1000{
1001 if (!page_poisoning_enabled())
1002 __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
1003
698 __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); 1004 __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
699 return 0; 1005 return 0;
700} 1006}
@@ -703,6 +1009,8 @@ static unsigned int features[] = {
703 VIRTIO_BALLOON_F_MUST_TELL_HOST, 1009 VIRTIO_BALLOON_F_MUST_TELL_HOST,
704 VIRTIO_BALLOON_F_STATS_VQ, 1010 VIRTIO_BALLOON_F_STATS_VQ,
705 VIRTIO_BALLOON_F_DEFLATE_ON_OOM, 1011 VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
1012 VIRTIO_BALLOON_F_FREE_PAGE_HINT,
1013 VIRTIO_BALLOON_F_PAGE_POISON,
706}; 1014};
707 1015
708static struct virtio_driver virtio_balloon_driver = { 1016static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 13b8cb563892..a1966cd7b677 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,15 +34,23 @@
34#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ 34#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
35#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ 35#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
36#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ 36#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
37#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
38#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */
37 39
38/* Size of a PFN in the balloon interface. */ 40/* Size of a PFN in the balloon interface. */
39#define VIRTIO_BALLOON_PFN_SHIFT 12 41#define VIRTIO_BALLOON_PFN_SHIFT 12
40 42
43#define VIRTIO_BALLOON_CMD_ID_STOP 0
44#define VIRTIO_BALLOON_CMD_ID_DONE 1
41struct virtio_balloon_config { 45struct virtio_balloon_config {
42 /* Number of pages host wants Guest to give up. */ 46 /* Number of pages host wants Guest to give up. */
43 __u32 num_pages; 47 __u32 num_pages;
44 /* Number of pages we've actually got in balloon. */ 48 /* Number of pages we've actually got in balloon. */
45 __u32 actual; 49 __u32 actual;
50 /* Free page report command id, readonly by guest */
51 __u32 free_page_report_cmd_id;
52 /* Stores PAGE_POISON if page poisoning is in use */
53 __u32 poison_val;
46}; 54};
47 55
48#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ 56#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
diff --git a/kernel/configs/kvm_guest.config b/kernel/configs/kvm_guest.config
index 108fecc20fc1..208481d91090 100644
--- a/kernel/configs/kvm_guest.config
+++ b/kernel/configs/kvm_guest.config
@@ -20,6 +20,7 @@ CONFIG_PARAVIRT=y
20CONFIG_KVM_GUEST=y 20CONFIG_KVM_GUEST=y
21CONFIG_S390_GUEST=y 21CONFIG_S390_GUEST=y
22CONFIG_VIRTIO=y 22CONFIG_VIRTIO=y
23CONFIG_VIRTIO_MENU=y
23CONFIG_VIRTIO_PCI=y 24CONFIG_VIRTIO_PCI=y
24CONFIG_VIRTIO_BLK=y 25CONFIG_VIRTIO_BLK=y
25CONFIG_VIRTIO_CONSOLE=y 26CONFIG_VIRTIO_CONSOLE=y
diff --git a/mm/page_poison.c b/mm/page_poison.c
index f7e2a676365a..f0c15e9017c0 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf)
17} 17}
18early_param("page_poison", early_page_poison_param); 18early_param("page_poison", early_page_poison_param);
19 19
20/**
21 * page_poisoning_enabled - check if page poisoning is enabled
22 *
23 * Return true if page poisoning is enabled, or false if not.
24 */
20bool page_poisoning_enabled(void) 25bool page_poisoning_enabled(void)
21{ 26{
22 /* 27 /*
@@ -29,6 +34,7 @@ bool page_poisoning_enabled(void)
29 (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && 34 (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
30 debug_pagealloc_enabled())); 35 debug_pagealloc_enabled()));
31} 36}
37EXPORT_SYMBOL_GPL(page_poisoning_enabled);
32 38
33static void poison_page(struct page *page) 39static void poison_page(struct page *page)
34{ 40{