diff options
| author | Yashomati <ygodbole@nvidia.com> | 2019-05-31 21:59:52 -0400 |
|---|---|---|
| committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2019-12-24 14:56:43 -0500 |
| commit | 87dc30edda5936afa82b0afa821c8be2e44343c5 (patch) | |
| tree | e1f61e27e96e88880626426db82dbe21c85e6053 /include/linux | |
| parent | cda3f78dc40d0f21b1108a4087b6198fb53bde02 (diff) | |
inject-vm-err: handlers for injected errors
If Linux/EBP causes an error that HV can't handle,
then instead of freezing the guest, HV injects the
error back into the guest. This enables the guest
to handle the error as gracefully as it can/needs.
This changeset provides 2 parts:
1. sample handlers: minimal placeholder handlers that
just dump the error information on to the console. This
is to be used as a reference for any customized elaborate
error handling that may be needed.
2. library module: it comes into existence only if/when
any error handler is registered. Its main responsibilities:
- map memory that's shared with HV where HV dumps all
information about the errors.
- register handlers for interrupts used by HV to inject
errors
- invoke custom error handlers when HV injects error
JIRA ESV-312
Bug 2580803
Change-Id: Ia8c6484d423fd33cabbfd901f0f6ebb0da95cb40
Signed-off-by: Yashomati <ygodbole@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2214402
Reviewed-on: https://git-master.nvidia.com/r/2128765
GVS: Gerrit_Virtual_Submit
Reviewed-by: Dmitry Pervushin <dpervushin@nvidia.com>
Reviewed-by: Hardik T Shah <hardikts@nvidia.com>
Reviewed-by: Rohit Upadhyay <rupadhyay@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/errinfo.h | 124 | ||||
| -rw-r--r-- | include/linux/vm_err.h | 58 |
2 files changed, 182 insertions, 0 deletions
diff --git a/include/linux/errinfo.h b/include/linux/errinfo.h new file mode 100644 index 000000000..eca3a9bcb --- /dev/null +++ b/include/linux/errinfo.h | |||
| @@ -0,0 +1,124 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * NVIDIA CORPORATION and its licensors retain all intellectual property | ||
| 5 | * and proprietary rights in and to this software, related documentation | ||
| 6 | * and any modifications thereto. Any use, reproduction, disclosure or | ||
| 7 | * distribution of this software and related documentation without an express | ||
| 8 | * license agreement from NVIDIA CORPORATION is strictly prohibited. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #ifndef __INCLUDED_ERRINFO_H__ | ||
| 12 | #define __INCLUDED_ERRINFO_H__ | ||
| 13 | |||
| 14 | enum errReason { | ||
| 15 | REASON_UNDEFINED = 0UL, | ||
| 16 | REASON_ASYNC_SMMU_CB, | ||
| 17 | REASON_ASYNC_SMMU_GLOBAL, | ||
| 18 | REASON_ASYNC_BRIDGE, | ||
| 19 | REASON_ASYNC_MC, | ||
| 20 | REASON_SYNC_INSTR_ABORT, | ||
| 21 | REASON_SYNC_DATA_ABORT, | ||
| 22 | REASON_SYNC_OTHER, | ||
| 23 | REASON_ENUM_SIZE | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum errType { | ||
| 27 | SYNC = 0UL, | ||
| 28 | ASYNC | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct __attribute__((__packed__)) async_metaData { | ||
| 32 | uint64_t rdIdx; | ||
| 33 | uint64_t wrIdx; | ||
| 34 | }; | ||
| 35 | |||
| 36 | #define NAME_SIZE 64 | ||
| 37 | |||
| 38 | struct __attribute__((__packed__)) async_bridgeErr { | ||
| 39 | char br_name[NAME_SIZE]; | ||
| 40 | unsigned int err_addr; | ||
| 41 | unsigned int err_status1; | ||
| 42 | unsigned int err_status2; | ||
| 43 | unsigned int rw; | ||
| 44 | unsigned int err_type; | ||
| 45 | unsigned int length; | ||
| 46 | unsigned int br_id; | ||
| 47 | unsigned int src_id; | ||
| 48 | unsigned int axi_id; | ||
| 49 | unsigned int count; | ||
| 50 | unsigned int protection; | ||
| 51 | unsigned int burst; | ||
| 52 | unsigned int cache; | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct __attribute__((__packed__)) async_smmuErr { | ||
| 56 | unsigned int stream_id; | ||
| 57 | unsigned int cb_id; | ||
| 58 | unsigned int fsynr0; | ||
| 59 | unsigned int fsynr1; | ||
| 60 | uint64_t far; | ||
| 61 | unsigned int fsr; | ||
| 62 | }; | ||
| 63 | |||
| 64 | struct __attribute__((__packed__)) async_mcErr { | ||
| 65 | uint64_t ch_base; | ||
| 66 | unsigned int int_status; | ||
| 67 | unsigned int err_status; | ||
| 68 | uint64_t fault_addr; | ||
| 69 | unsigned int vcpuid; //0xffffU; /* IDLE_vCPU_ID */ | ||
| 70 | unsigned int client_id; | ||
| 71 | int32_t peripheral_id; | ||
| 72 | }; | ||
| 73 | |||
| 74 | struct __attribute__((__packed__)) sync_dataAbort { | ||
| 75 | bool isFilled; //metadata field per VCpu | ||
| 76 | bool isWrite; | ||
| 77 | uint8_t accessSize; | ||
| 78 | unsigned int offendingVCpuId; | ||
| 79 | unsigned int esrEl2; | ||
| 80 | uint64_t faultAddr; | ||
| 81 | uint64_t spsrEl2; | ||
| 82 | uint64_t elrEl1; | ||
| 83 | uint64_t gprArray[31]; | ||
| 84 | }; | ||
| 85 | |||
| 86 | struct __attribute__((__packed__)) errData { | ||
| 87 | unsigned int offendingGuestId; | ||
| 88 | enum errType errType; | ||
| 89 | enum errReason errReason; | ||
| 90 | union { | ||
| 91 | // *A*synchronous | ||
| 92 | struct async_bridgeErr async_bridgeErr; | ||
| 93 | struct async_smmuErr async_smmuErr; | ||
| 94 | struct async_mcErr async_mcErr; | ||
| 95 | // Synchronous | ||
| 96 | struct sync_dataAbort sync_dataAbort; | ||
| 97 | }; | ||
| 98 | }; | ||
| 99 | |||
| 100 | /* VM shared memory for error information is allocated contiguously to store | ||
| 101 | * Asynchronous(async) error information followed by the Synchronous(sync) | ||
| 102 | * error information. HV has write access and the VM has read access to this | ||
| 103 | * shared memory. The shared memory layout looks like: | ||
| 104 | * | ||
| 105 | * |--async-err-metadata--|--async-errors-array-|--sync-errors-array-| | ||
| 106 | * | ||
| 107 | * Size of async errors array = Max errors + 1(to avoid same empty and full | ||
| 108 | * conditions of the buffer) | ||
| 109 | * Size of sync errors array = 1 error per VCPU * number of VCPUs on a VM | ||
| 110 | * | ||
| 111 | * So for a give VM, shared memory has: | ||
| 112 | * | ||
| 113 | * |--------ASyncErrInfo----------------|-------SyncErrInfo-------------------| | ||
| 114 | * |--------1bufferPerVM----------------|---VCpu0-buffer---|--VCpuN-buffer----| | ||
| 115 | * |---metaData----|---errData----------|-metaData+errData-|-metaData+errData-| | ||
| 116 | * |-rdIdx-|-wrIdx-|-Err1-|-Err2-|-ErrN-|-isFilled-|-Err1--|-isFilled-|-Err1--| | ||
| 117 | */ | ||
| 118 | |||
| 119 | struct __attribute__((__packed__)) errInfo { | ||
| 120 | struct async_metaData async_metaData; | ||
| 121 | struct errData errData[]; | ||
| 122 | }; | ||
| 123 | |||
| 124 | #endif | ||
diff --git a/include/linux/vm_err.h b/include/linux/vm_err.h new file mode 100644 index 000000000..e8fcae8b6 --- /dev/null +++ b/include/linux/vm_err.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is licensed under the terms of the GNU General Public | ||
| 5 | * License version 2, as published by the Free Software Foundation, and | ||
| 6 | * may be copied, distributed, and modified under those terms. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 11 | * GNU General Public License for more details. | ||
| 12 | * | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef __VM_ERR_H_ | ||
| 16 | #define __VM_ERR_H_ | ||
| 17 | |||
| 18 | #if IS_ENABLED(CONFIG_TEGRA_VM_ERR_HANDLER) | ||
| 19 | #include <linux/errinfo.h> | ||
| 20 | |||
| 21 | struct vm_err_handlers { | ||
| 22 | /* return true, if error needs kernel to enter bad mode and reboot. | ||
| 23 | * return false, if error doesn't need reboot. | ||
| 24 | */ | ||
| 25 | bool (*fn_self_async)(const struct errData *const err_data); | ||
| 26 | bool (*fn_self_sync)(const struct errData *const err_data); | ||
| 27 | bool (*fn_peer)(const struct errData *const err_data); | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct tegra_hv_config { | ||
| 31 | unsigned int guest_id_self; | ||
| 32 | unsigned int num_guests; | ||
| 33 | }; | ||
| 34 | |||
| 35 | static const char * const fault_reason_desc[] = { | ||
| 36 | "Undefined", | ||
| 37 | "SMMU CB", | ||
| 38 | "SMMU Global", | ||
| 39 | "Bridge", | ||
| 40 | "Memory Controller", | ||
| 41 | "Instruction Abort", | ||
| 42 | "Data Abort", | ||
| 43 | "Other synchronous exception", | ||
| 44 | }; | ||
| 45 | |||
| 46 | int tegra_hv_register_vm_err_hooks(struct vm_err_handlers *custom_handlers); | ||
| 47 | void tegra_hv_get_config(struct tegra_hv_config *config); | ||
| 48 | |||
| 49 | #else | ||
| 50 | static inline int tegra_hv_register_vm_err_hooks( | ||
| 51 | struct vm_err_handlers *custom_handlers) | ||
| 52 | { | ||
| 53 | pr_err("Can you please enable CONFIG_TEGRA_VM_ERR_HANDLER?"); | ||
| 54 | return -EINVAL; | ||
| 55 | } | ||
| 56 | #endif | ||
| 57 | |||
| 58 | #endif /* __VM_ERR_H_ */ | ||
