summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDexuan Cui <decui@microsoft.com>2019-08-22 01:05:37 -0400
committerDavid S. Miller <davem@davemloft.net>2019-08-22 03:25:12 -0400
commite5d2f910cfeca852f6e2dc19dfa8dab264ce0cde (patch)
tree7874871750a7fadd2d1800ade6b95ef4f423ac35
parentfed07ef3b072ff5b420de954697c529e4bed73e2 (diff)
PCI: hv: Add a paravirtual backchannel in software
Windows SR-IOV provides a backchannel mechanism in software for communication between a VF driver and a PF driver. These "configuration blocks" are similar in concept to PCI configuration space, but instead of doing reads and writes in 32-bit chunks through a very slow path, packets of up to 128 bytes can be sent or received asynchronously. Nearly every SR-IOV device contains just such a communications channel in hardware, so using this one in software is usually optional. Using the software channel, however, allows driver implementers to leverage software tools that fuzz the communications channel looking for vulnerabilities. The usage model for these packets puts the responsibility for reading or writing on the VF driver. The VF driver sends a read or a write packet, indicating which "block" is being referred to by number. If the PF driver wishes to initiate communication, it can "invalidate" one or more of the first 64 blocks. This invalidation is delivered via a callback supplied by the VF driver by this driver. No protocol is implied, except that supplied by the PF and VF drivers. Signed-off-by: Jake Oshins <jakeo@microsoft.com> Signed-off-by: Dexuan Cui <decui@microsoft.com> Cc: Haiyang Zhang <haiyangz@microsoft.com> Cc: K. Y. Srinivasan <kys@microsoft.com> Cc: Stephen Hemminger <sthemmin@microsoft.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/pci/controller/pci-hyperv.c302
-rw-r--r--include/linux/hyperv.h15
2 files changed, 317 insertions, 0 deletions
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b625458afa..57adeca7bda9 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
365 struct tran_int_desc int_desc; 365 struct tran_int_desc int_desc;
366} __packed; 366} __packed;
367 367
368/*
369 * Note: the VM must pass a valid block id, wslot and bytes_requested.
370 */
371struct pci_read_block {
372 struct pci_message message_type;
373 u32 block_id;
374 union win_slot_encoding wslot;
375 u32 bytes_requested;
376} __packed;
377
378struct pci_read_block_response {
379 struct vmpacket_descriptor hdr;
380 u32 status;
381 u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
382} __packed;
383
384/*
385 * Note: the VM must pass a valid block id, wslot and byte_count.
386 */
387struct pci_write_block {
388 struct pci_message message_type;
389 u32 block_id;
390 union win_slot_encoding wslot;
391 u32 byte_count;
392 u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
393} __packed;
394
395struct pci_dev_inval_block {
396 struct pci_incoming_message incoming;
397 union win_slot_encoding wslot;
398 u64 block_mask;
399} __packed;
400
368struct pci_dev_incoming { 401struct pci_dev_incoming {
369 struct pci_incoming_message incoming; 402 struct pci_incoming_message incoming;
370 union win_slot_encoding wslot; 403 union win_slot_encoding wslot;
@@ -499,6 +532,9 @@ struct hv_pci_dev {
499 struct hv_pcibus_device *hbus; 532 struct hv_pcibus_device *hbus;
500 struct work_struct wrk; 533 struct work_struct wrk;
501 534
535 void (*block_invalidate)(void *context, u64 block_mask);
536 void *invalidate_context;
537
502 /* 538 /*
503 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then 539 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
504 * read it back, for each of the BAR offsets within config space. 540 * read it back, for each of the BAR offsets within config space.
@@ -817,6 +853,256 @@ static struct pci_ops hv_pcifront_ops = {
817 .write = hv_pcifront_write_config, 853 .write = hv_pcifront_write_config,
818}; 854};
819 855
856/*
857 * Paravirtual backchannel
858 *
859 * Hyper-V SR-IOV provides a backchannel mechanism in software for
860 * communication between a VF driver and a PF driver. These
861 * "configuration blocks" are similar in concept to PCI configuration space,
862 * but instead of doing reads and writes in 32-bit chunks through a very slow
863 * path, packets of up to 128 bytes can be sent or received asynchronously.
864 *
865 * Nearly every SR-IOV device contains just such a communications channel in
866 * hardware, so using this one in software is usually optional. Using the
867 * software channel, however, allows driver implementers to leverage software
868 * tools that fuzz the communications channel looking for vulnerabilities.
869 *
870 * The usage model for these packets puts the responsibility for reading or
871 * writing on the VF driver. The VF driver sends a read or a write packet,
872 * indicating which "block" is being referred to by number.
873 *
874 * If the PF driver wishes to initiate communication, it can "invalidate" one or
875 * more of the first 64 blocks. This invalidation is delivered via a callback
876 * supplied by the VF driver by this driver.
877 *
878 * No protocol is implied, except that supplied by the PF and VF drivers.
879 */
880
881struct hv_read_config_compl {
882 struct hv_pci_compl comp_pkt;
883 void *buf;
884 unsigned int len;
885 unsigned int bytes_returned;
886};
887
888/**
889 * hv_pci_read_config_compl() - Invoked when a response packet
890 * for a read config block operation arrives.
891 * @context: Identifies the read config operation
892 * @resp: The response packet itself
893 * @resp_packet_size: Size in bytes of the response packet
894 */
895static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
896 int resp_packet_size)
897{
898 struct hv_read_config_compl *comp = context;
899 struct pci_read_block_response *read_resp =
900 (struct pci_read_block_response *)resp;
901 unsigned int data_len, hdr_len;
902
903 hdr_len = offsetof(struct pci_read_block_response, bytes);
904 if (resp_packet_size < hdr_len) {
905 comp->comp_pkt.completion_status = -1;
906 goto out;
907 }
908
909 data_len = resp_packet_size - hdr_len;
910 if (data_len > 0 && read_resp->status == 0) {
911 comp->bytes_returned = min(comp->len, data_len);
912 memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
913 } else {
914 comp->bytes_returned = 0;
915 }
916
917 comp->comp_pkt.completion_status = read_resp->status;
918out:
919 complete(&comp->comp_pkt.host_event);
920}
921
922/**
923 * hv_read_config_block() - Sends a read config block request to
924 * the back-end driver running in the Hyper-V parent partition.
925 * @pdev: The PCI driver's representation for this device.
926 * @buf: Buffer into which the config block will be copied.
927 * @len: Size in bytes of buf.
928 * @block_id: Identifies the config block which has been requested.
929 * @bytes_returned: Size which came back from the back-end driver.
930 *
931 * Return: 0 on success, -errno on failure
932 */
933int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
934 unsigned int block_id, unsigned int *bytes_returned)
935{
936 struct hv_pcibus_device *hbus =
937 container_of(pdev->bus->sysdata, struct hv_pcibus_device,
938 sysdata);
939 struct {
940 struct pci_packet pkt;
941 char buf[sizeof(struct pci_read_block)];
942 } pkt;
943 struct hv_read_config_compl comp_pkt;
944 struct pci_read_block *read_blk;
945 int ret;
946
947 if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
948 return -EINVAL;
949
950 init_completion(&comp_pkt.comp_pkt.host_event);
951 comp_pkt.buf = buf;
952 comp_pkt.len = len;
953
954 memset(&pkt, 0, sizeof(pkt));
955 pkt.pkt.completion_func = hv_pci_read_config_compl;
956 pkt.pkt.compl_ctxt = &comp_pkt;
957 read_blk = (struct pci_read_block *)&pkt.pkt.message;
958 read_blk->message_type.type = PCI_READ_BLOCK;
959 read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
960 read_blk->block_id = block_id;
961 read_blk->bytes_requested = len;
962
963 ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
964 sizeof(*read_blk), (unsigned long)&pkt.pkt,
965 VM_PKT_DATA_INBAND,
966 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
967 if (ret)
968 return ret;
969
970 ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
971 if (ret)
972 return ret;
973
974 if (comp_pkt.comp_pkt.completion_status != 0 ||
975 comp_pkt.bytes_returned == 0) {
976 dev_err(&hbus->hdev->device,
977 "Read Config Block failed: 0x%x, bytes_returned=%d\n",
978 comp_pkt.comp_pkt.completion_status,
979 comp_pkt.bytes_returned);
980 return -EIO;
981 }
982
983 *bytes_returned = comp_pkt.bytes_returned;
984 return 0;
985}
986EXPORT_SYMBOL(hv_read_config_block);
987
988/**
989 * hv_pci_write_config_compl() - Invoked when a response packet for a write
990 * config block operation arrives.
991 * @context: Identifies the write config operation
992 * @resp: The response packet itself
993 * @resp_packet_size: Size in bytes of the response packet
994 */
995static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
996 int resp_packet_size)
997{
998 struct hv_pci_compl *comp_pkt = context;
999
1000 comp_pkt->completion_status = resp->status;
1001 complete(&comp_pkt->host_event);
1002}
1003
1004/**
1005 * hv_write_config_block() - Sends a write config block request to the
1006 * back-end driver running in the Hyper-V parent partition.
1007 * @pdev: The PCI driver's representation for this device.
1008 * @buf: Buffer from which the config block will be copied.
1009 * @len: Size in bytes of buf.
1010 * @block_id: Identifies the config block which is being written.
1011 *
1012 * Return: 0 on success, -errno on failure
1013 */
1014int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
1015 unsigned int block_id)
1016{
1017 struct hv_pcibus_device *hbus =
1018 container_of(pdev->bus->sysdata, struct hv_pcibus_device,
1019 sysdata);
1020 struct {
1021 struct pci_packet pkt;
1022 char buf[sizeof(struct pci_write_block)];
1023 u32 reserved;
1024 } pkt;
1025 struct hv_pci_compl comp_pkt;
1026 struct pci_write_block *write_blk;
1027 u32 pkt_size;
1028 int ret;
1029
1030 if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
1031 return -EINVAL;
1032
1033 init_completion(&comp_pkt.host_event);
1034
1035 memset(&pkt, 0, sizeof(pkt));
1036 pkt.pkt.completion_func = hv_pci_write_config_compl;
1037 pkt.pkt.compl_ctxt = &comp_pkt;
1038 write_blk = (struct pci_write_block *)&pkt.pkt.message;
1039 write_blk->message_type.type = PCI_WRITE_BLOCK;
1040 write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
1041 write_blk->block_id = block_id;
1042 write_blk->byte_count = len;
1043 memcpy(write_blk->bytes, buf, len);
1044 pkt_size = offsetof(struct pci_write_block, bytes) + len;
1045 /*
1046 * This quirk is required on some hosts shipped around 2018, because
1047 * these hosts don't check the pkt_size correctly (new hosts have been
1048 * fixed since early 2019). The quirk is also safe on very old hosts
1049 * and new hosts, because, on them, what really matters is the length
1050 * specified in write_blk->byte_count.
1051 */
1052 pkt_size += sizeof(pkt.reserved);
1053
1054 ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
1055 (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
1056 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
1057 if (ret)
1058 return ret;
1059
1060 ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
1061 if (ret)
1062 return ret;
1063
1064 if (comp_pkt.completion_status != 0) {
1065 dev_err(&hbus->hdev->device,
1066 "Write Config Block failed: 0x%x\n",
1067 comp_pkt.completion_status);
1068 return -EIO;
1069 }
1070
1071 return 0;
1072}
1073EXPORT_SYMBOL(hv_write_config_block);
1074
1075/**
1076 * hv_register_block_invalidate() - Invoked when a config block invalidation
1077 * arrives from the back-end driver.
1078 * @pdev: The PCI driver's representation for this device.
1079 * @context: Identifies the device.
1080 * @block_invalidate: Identifies all of the blocks being invalidated.
1081 *
1082 * Return: 0 on success, -errno on failure
1083 */
1084int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
1085 void (*block_invalidate)(void *context,
1086 u64 block_mask))
1087{
1088 struct hv_pcibus_device *hbus =
1089 container_of(pdev->bus->sysdata, struct hv_pcibus_device,
1090 sysdata);
1091 struct hv_pci_dev *hpdev;
1092
1093 hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
1094 if (!hpdev)
1095 return -ENODEV;
1096
1097 hpdev->block_invalidate = block_invalidate;
1098 hpdev->invalidate_context = context;
1099
1100 put_pcichild(hpdev);
1101 return 0;
1102
1103}
1104EXPORT_SYMBOL(hv_register_block_invalidate);
1105
820/* Interrupt management hooks */ 1106/* Interrupt management hooks */
821static void hv_int_desc_free(struct hv_pci_dev *hpdev, 1107static void hv_int_desc_free(struct hv_pci_dev *hpdev,
822 struct tran_int_desc *int_desc) 1108 struct tran_int_desc *int_desc)
@@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context)
1968 struct pci_response *response; 2254 struct pci_response *response;
1969 struct pci_incoming_message *new_message; 2255 struct pci_incoming_message *new_message;
1970 struct pci_bus_relations *bus_rel; 2256 struct pci_bus_relations *bus_rel;
2257 struct pci_dev_inval_block *inval;
1971 struct pci_dev_incoming *dev_message; 2258 struct pci_dev_incoming *dev_message;
1972 struct hv_pci_dev *hpdev; 2259 struct hv_pci_dev *hpdev;
1973 2260
@@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context)
2045 } 2332 }
2046 break; 2333 break;
2047 2334
2335 case PCI_INVALIDATE_BLOCK:
2336
2337 inval = (struct pci_dev_inval_block *)buffer;
2338 hpdev = get_pcichild_wslot(hbus,
2339 inval->wslot.slot);
2340 if (hpdev) {
2341 if (hpdev->block_invalidate) {
2342 hpdev->block_invalidate(
2343 hpdev->invalidate_context,
2344 inval->block_mask);
2345 }
2346 put_pcichild(hpdev);
2347 }
2348 break;
2349
2048 default: 2350 default:
2049 dev_warn(&hbus->hdev->device, 2351 dev_warn(&hbus->hdev->device,
2050 "Unimplemented protocol message %x\n", 2352 "Unimplemented protocol message %x\n",
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc34c4a6..9d37f8cf1245 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1578,4 +1578,19 @@ hv_pkt_iter_next(struct vmbus_channel *channel,
1578 for (pkt = hv_pkt_iter_first(channel); pkt; \ 1578 for (pkt = hv_pkt_iter_first(channel); pkt; \
1579 pkt = hv_pkt_iter_next(channel, pkt)) 1579 pkt = hv_pkt_iter_next(channel, pkt))
1580 1580
1581/*
1582 * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
1583 * sends requests to read and write blocks. Each block must be 128 bytes or
1584 * smaller. Optionally, the VF driver can register a callback function which
1585 * will be invoked when the host says that one or more of the first 64 block
1586 * IDs is "invalid" which means that the VF driver should reread them.
1587 */
1588#define HV_CONFIG_BLOCK_SIZE_MAX 128
1589int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
1590 unsigned int block_id, unsigned int *bytes_returned);
1591int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
1592 unsigned int block_id);
1593int hv_register_block_invalidate(struct pci_dev *dev, void *context,
1594 void (*block_invalidate)(void *context,
1595 u64 block_mask));
1581#endif /* _HYPERV_H */ 1596#endif /* _HYPERV_H */