inject-vm-err: handlers for injected errors

If Linux/EBP causes an error that HV can't handle, then instead of freezing the guest, HV injects the error back into the guest. This enables the guest to handle the error as gracefully as it can/needs. This changeset provides 2 parts: 1. sample handlers: minimal placeholder handlers that just dump the error information on to the console. This is to be used as a reference for any customized elaborate error handling that may be needed. 2. library module: it comes into existence only if/when any error handler is registered. Its main responsibilities: - map memory that's shared with HV where HV dumps all information about the errors. - register handlers for interrupts used by HV to inject errors - invoke custom error handlers when HV injects error JIRA ESV-312 Bug 2580803 Change-Id: Ia8c6484d423fd33cabbfd901f0f6ebb0da95cb40 Signed-off-by: Yashomati <ygodbole@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2214402 Reviewed-on: https://git-master.nvidia.com/r/2128765 GVS: Gerrit_Virtual_Submit Reviewed-by: Dmitry Pervushin <dpervushin@nvidia.com> Reviewed-by: Hardik T Shah <hardikts@nvidia.com> Reviewed-by: Rohit Upadhyay <rupadhyay@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Yashomati <ygodbole@nvidia.com> 2019-05-31 21:59:52 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2019-12-24 14:56:43 -0500
commit: 87dc30edda5936afa82b0afa821c8be2e44343c5 (patch)
tree: e1f61e27e96e88880626426db82dbe21c85e6053 /drivers/virt/tegra/vm_err.c
parent: cda3f78dc40d0f21b1108a4087b6198fb53bde02 (diff)
1 files changed, 535 insertions, 0 deletions
diff --git a/drivers/virt/tegra/vm_err.c b/drivers/virt/tegra/vm_err.c
new file mode 100644
index 000000000..d9f11248c
--- /dev/null
+++ b/drivers/virt/tegra/vm_err.c
@@ -0,0 +1,535 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) "vm-err: " fmt
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/vm_err.h>
+#include <asm/traps.h>
+#include <asm-generic/irq_regs.h>
+#include <asm/system_misc.h>
+#include <soc/tegra/virt/syscalls.h>
+#include <soc/tegra/chip-id.h>
+struct tegra_hv_err_ctrl {
+        struct device *dev;
+        struct errInfo *err_info;
+        unsigned int async_err_arr_items;
+        int hv_peer_err_irq_id;
+        unsigned int vcpu_cnt;
+        struct serr_hook hook;
+        struct vm_err_handlers handlers;
+};
+static struct tegra_hv_config config;
+static unsigned int intr_info[3]; /* intr_property_size = 3 */
+static struct property interrupts_prop = {
+        .name = "interrupts",
+};
+static bool check_sync_err(const unsigned int vcpu_id,
+        const struct tegra_hv_err_ctrl *const ctrl,
+        bool *send_sync_err_ack)
+{
+        uint64_t rd_idx;
+        const struct errData *err_data;
+        if (vcpu_id >= ctrl->vcpu_cnt) {
+                dev_crit(ctrl->dev, "%s: Invalid vcpu id %u\n", __func__,
+                        vcpu_id);
+                *send_sync_err_ack = false;
+                /* Unexpected vcpu id. Enter bad mode. */
+                return true;
+        }
+        /* Shared memory layout is:
+         * |--async-err-metadata--|--async-errors-array-|--sync-errors-array-|
+         * Size of async errors array = Max errors + 1(to avoid same empty
+         * and full conditions of the buffer)
+         * Size of sync errors array = 1 error per VCPU * number of VCPUs in VM
+         */
+        rd_idx = ctrl->async_err_arr_items + vcpu_id;
+        /* It's already validated at init time that sufficient memory is
+         * allocated to hold async_err_arr_items + sync error per vcpu. Hence,
+         * after validating the vcpu_id above, no need to validate rd_idx here.
+         */
+        err_data = &(ctrl->err_info->errData[rd_idx]);
+        if (!err_data->sync_dataAbort.isFilled) {
+                *send_sync_err_ack = false;
+                dev_info(ctrl->dev, "No synchronous error data on vcpu %u\n",
+                        vcpu_id);
+                /* No sync error. No need to enter bad mode. */
+                return false;
+        }
+        if (err_data->errType != SYNC) {
+                dev_crit(ctrl->dev, "%s: unexpected error Type %d\n",
+                        __func__, err_data->errType);
+                *send_sync_err_ack = true;
+                /* Unexpected error id. Enter bad mode. */
+                return true;
+        }
+        if (err_data->offendingGuestId != config.guest_id_self) {
+                dev_crit(ctrl->dev, "%s: invalid offender id %u\n", __func__,
+                        err_data->offendingGuestId);
+                *send_sync_err_ack = true;
+                /* Invalid id of offending guest. Enter bad mode. */
+                return true;
+        }
+        dev_err(ctrl->dev, "Synchronous error on vcpu %u\n", vcpu_id);
+        if (ctrl->handlers.fn_self_sync) {
+                *send_sync_err_ack = true;
+                /* Enter bad_mode (or otherwise) as custom handler dictates */
+                return ctrl->handlers.fn_self_sync(err_data);
+        }
+        /* should never reach here */
+        *send_sync_err_ack = true;
+        /* Reaching here is unexpected. Enter bad mode. */
+        return true;
+}
+static irqreturn_t async_err_handler(int irq, void *context)
+{
+        unsigned int num_async_errs_read = 0;
+        bool enter_bad_mode = false;
+        const struct tegra_hv_err_ctrl *const ctrl = context;
+        const unsigned int vcpu_id = hyp_read_vcpu_id();
+        uint64_t local_rd_idx, next_rd_idx;
+        const struct errData *err_data;
+        bool (*fn_self_async)(const struct errData *const err_data);
+        bool (*fn_peer)(const struct errData *const err_data);
+        bool (*handler)(const struct errData *const err_data);
+        struct pt_regs *regs;
+        if (vcpu_id != 0) {
+                dev_err(ctrl->dev, "Asynchronous error on vcpu %u\n", vcpu_id);
+                /* Only VCPU0 is expected to receive async error vIRQ */
+                return IRQ_HANDLED;
+        }
+        fn_self_async = ctrl->handlers.fn_self_async;
+        fn_peer = ctrl->handlers.fn_peer;
+        if ((fn_self_async == NULL) && (fn_peer == NULL)) {
+                dev_err(ctrl->dev, "Asynchronous error handlers absent\n");
+                return IRQ_HANDLED;
+        }
+        local_rd_idx = ctrl->err_info->async_metaData.rdIdx;
+        dev_dbg(ctrl->dev, "Local Rd Idx = %llu, shared Wr Idx = %llu\n",
+                local_rd_idx, ctrl->err_info->async_metaData.wrIdx);
+        /* Check async error. Read until error queue gets empty */
+        while (local_rd_idx != ctrl->err_info->async_metaData.wrIdx) {
+                next_rd_idx = (local_rd_idx + 1) % ctrl->async_err_arr_items;
+                err_data = &(ctrl->err_info->errData[next_rd_idx]);
+                if (err_data->offendingGuestId == config.guest_id_self)
+                        handler = fn_self_async;
+                else
+                        handler = fn_peer;
+                if (handler) {
+                        if (handler(err_data) == true)
+                                enter_bad_mode = true;
+                }
+                local_rd_idx = next_rd_idx;
+                num_async_errs_read++;
+                dev_dbg(ctrl->dev, "Local Rd Idx = %llu\n", local_rd_idx);
+        }
+        if (num_async_errs_read) {
+                dev_err(ctrl->dev, "%u asynchronous error(s) read\n",
+                        num_async_errs_read);
+                /* Send ack for async error(s) to HV */
+                if (hyp_send_async_err_ack(local_rd_idx) != 0) {
+                        dev_crit(ctrl->dev,
+                                "%s: Sending ack failed. Setting bad mode\n",
+                                __func__);
+                        /* Unexpected */
+                        enter_bad_mode = true;
+                }
+        }
+        if (enter_bad_mode) {
+                regs = get_irq_regs();
+                die("Oops - bad mode", regs, 0);
+                panic("bad mode");
+        }
+        return IRQ_HANDLED;
+}
+static int sync_err_handler(struct pt_regs *regs, int reason,
+        uint32_t esr, void *context)
+{
+        bool enter_bad_mode = false;
+        bool send_sync_err_ack = false;
+        const struct tegra_hv_err_ctrl *const ctrl = context;
+        const unsigned int vcpu_id = hyp_read_vcpu_id();
+        /* Check sync error */
+        if (check_sync_err(vcpu_id, ctrl, &send_sync_err_ack) == true)
+                enter_bad_mode = true;
+        /* Send ack for error to HV. */
+        if (send_sync_err_ack) {
+                if (hyp_send_sync_err_ack(send_sync_err_ack) != 0) {
+                        dev_crit(ctrl->dev,
+                                "%s: Sending ack failed. Setting bad mode\n",
+                                __func__);
+                        /* Unexpected */
+                        enter_bad_mode = true;
+                }
+        }
+        /* Caller expects 0 to enter bad mode */
+        return (!enter_bad_mode);
+}
+void tegra_hv_get_config(struct tegra_hv_config *cfg)
+{
+        cfg->guest_id_self = config.guest_id_self;
+        cfg->num_guests = config.num_guests;
+}
+EXPORT_SYMBOL(tegra_hv_get_config);
+static int virq_handler_init(const struct platform_device *pdev)
+{
+        int ret;
+        struct irq_data *peer_err_irq_data;
+        int lin_peer_err_irq_id;
+        struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
+        struct device dev = pdev->dev;
+        dev_info(ctrl->dev, "Error notification HV IRQ id: %d\n",
+                ctrl->hv_peer_err_irq_id);
+        /* Ensure HV returned valid irq */
+        if (ctrl->hv_peer_err_irq_id == -1)
+                return 0;
+        /* Set indicate irq type 0 to indicate Shared Peripheral Irq */
+        intr_info[0] = cpu_to_be32(0);
+        /* Id in SPI namespace - subtract number of PPIs
+         * (Private Peripheral Irqs) which is = 32
+         */
+        intr_info[1] = cpu_to_be32(ctrl->hv_peer_err_irq_id - 32);
+        /* Trigger irq on low-to-high edge (0x1) */
+        intr_info[2] = cpu_to_be32(IRQF_TRIGGER_RISING);
+        interrupts_prop.length = sizeof(intr_info);
+        dev_info(ctrl->dev, "interrupts_prop.length %u\n",
+                interrupts_prop.length);
+        interrupts_prop.value = intr_info;
+        if (of_add_property(dev.of_node, &interrupts_prop)) {
+                dev_err(ctrl->dev, "%s: failed to add interrupts property\n",
+                        __func__);
+                return -EACCES;
+        }
+        lin_peer_err_irq_id = of_irq_get(dev.of_node, 0);
+        if (lin_peer_err_irq_id < 0) {
+                dev_err(ctrl->dev, "%s: Unable to get Linux irq for id %d\n",
+                        __func__, ctrl->hv_peer_err_irq_id);
+                return lin_peer_err_irq_id;
+        }
+        peer_err_irq_data = irq_get_irq_data(lin_peer_err_irq_id);
+        if (peer_err_irq_data == NULL) {
+                dev_err(ctrl->dev, "%s: Failed to get data for Linux irq %d\n",
+                        __func__, lin_peer_err_irq_id);
+                return -ENODEV;
+        }
+        ret = devm_request_irq(&dev, lin_peer_err_irq_id, async_err_handler,
+                        IRQ_NOTHREAD, dev_name(&dev), ctrl);
+        if (ret < 0) {
+                dev_err(ctrl->dev,
+                        "%s: failed to register IRQ %d, Err %d, %s\n",
+                        __func__, lin_peer_err_irq_id, ret, pdev->name);
+                return ret;
+        }
+        dev_info(ctrl->dev, "Registered Linux IRQ %d for peer notification\n",
+                lin_peer_err_irq_id);
+        return 0;
+}
+static int serr_handler_init(struct platform_device *pdev)
+{
+        struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
+        ctrl->hook.fn = sync_err_handler;
+        ctrl->hook.priv = platform_get_drvdata(pdev);
+        register_serr_hook(&ctrl->hook);
+        return 0;
+}
+static int shared_mem_map(struct platform_device *pdev)
+{
+        uint64_t ipa, buff_size, required_size;
+        int ret;
+        struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
+        /* Get error info details */
+        ret = hyp_read_err_info_get(&ipa, &buff_size,
+                &ctrl->async_err_arr_items, &ctrl->hv_peer_err_irq_id,
+                &ctrl->vcpu_cnt);
+        if (ret != 0) {
+                /* It could come here if DTS and defconfig enable execution
+                 * of this code, but HV hasn't implemented the hypercall.
+                 * Flag error.
+                 */
+                dev_err(ctrl->dev,
+                        "%s: failed to get err memory address. Err %d\n",
+                        __func__, ret);
+                return -ENODEV;
+        }
+        if ((ipa == 0) || (buff_size == 0) ||
+                (ctrl->async_err_arr_items == 0)) {
+                /* It could come here if DTS and defconfig enable execution
+                 * of this code, but PCT hasn't enabled error injection.
+                 * A warning should suffice.
+                 */
+                dev_warn(ctrl->dev, "%s: invalid shared memory parameters\n",
+                        __func__);
+                return -ENOMEM;
+        }
+        /* Shared memory layout is:
+         * |--async-err-metadata--|--async-errors-array-|--sync-errors-array-|
+         * Size of async errors array = Max errors + 1 (to avoid same empty and
+         * full conditions of the buffer)
+         * Size of sync errors array = 1 error per VCPU * number of VCPUs on
+         * a VM
+         */
+        required_size = sizeof(struct async_metaData) +
+                (sizeof(struct errData) *
+                (ctrl->async_err_arr_items + ctrl->vcpu_cnt));
+        if (buff_size < required_size) {
+                dev_err(ctrl->dev,
+                        "%s:invalid params. size %llu. required size %llu\n",
+                        __func__, buff_size, required_size);
+                dev_err(ctrl->dev, "%s: async arr size %u. vcpus %u\n",
+                        __func__, ctrl->async_err_arr_items, ctrl->vcpu_cnt);
+                return -ENOMEM;
+        }
+        dev_info(ctrl->dev, "%s: Err info IPA for guest %u: 0x%llx\n",
+                __func__, config.guest_id_self, ipa);
+        dev_info(ctrl->dev, "Err info buf size 0x%llX\n", buff_size);
+        dev_info(ctrl->dev, "Async err arr size %u. Number of VCPUs %u\n",
+                ctrl->async_err_arr_items, ctrl->vcpu_cnt);
+        /* Map shared memory */
+        ctrl->err_info = (struct errInfo *) ioremap_cache(ipa, buff_size);
+        if (ctrl->err_info == NULL)
+                return -ENOMEM;
+        return 0;
+}
+static int hyp_config_init(struct device *dev)
+{
+        int ret = hyp_read_gid(&config.guest_id_self);
+        if (ret != 0) {
+                dev_err(dev, "%s: failed to read guest id. Err %d\n",
+                        __func__, ret);
+                return ret;
+        }
+        ret = hyp_read_nguests(&config.num_guests);
+        if (ret != 0) {
+                /* Only privileged guest can query number of guests */
+                dev_warn(dev, "%s: can't read number of guests. Err %d\n",
+                        __func__, ret);
+        }
+        dev_info(dev, "%s: guest id %u num guests %u\n", __func__,
+                config.guest_id_self, config.num_guests);
+        return 0;
+}
+static void shared_structs_check(struct device *dev)
+{
+        /* Ensure coherency with common header */
+        BUILD_BUG_ON(REASON_ENUM_SIZE != (ARRAY_SIZE(fault_reason_desc)));
+        /* Manually compare these sizes with HV console dump to ensure
+         * common structures shared by HV and Linux are in sync
+         */
+        dev_info(dev, "async_metaData size 0x%lx\n",
+                sizeof(struct async_metaData));
+        dev_info(dev, "async_bridgeErr size 0x%lx\n",
+                sizeof(struct async_bridgeErr));
+        dev_info(dev, "async_smmuErr size 0x%lx\n",
+                sizeof(struct async_smmuErr));
+        dev_info(dev, "async_mcErr size 0x%lx\n",
+                sizeof(struct async_mcErr));
+        dev_info(dev, "sync_dataAbort size 0x%lx\n",
+                sizeof(struct sync_dataAbort));
+        dev_info(dev, "errData size 0x%lx\n", sizeof(struct errData));
+}
+static int vm_err_handler_init(struct platform_device *pdev)
+{
+        int ret;
+        struct tegra_hv_err_ctrl *ctrl;
+        struct device *dev = &pdev->dev;
+        if (!is_tegra_hypervisor_mode()) {
+                dev_err(dev, "%s: hypervisor is not present\n", __func__);
+                return -ENODEV;
+        }
+        shared_structs_check(dev);
+        ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL);
+        if (!ctrl)
+                return -ENOMEM;
+        ctrl->dev = dev;
+        platform_set_drvdata(pdev, ctrl);
+        ret = hyp_config_init(dev);
+        if (ret)
+                return ret;
+        ret = shared_mem_map(pdev);
+        if (ret)
+                return -ENOMEM;
+        ret = serr_handler_init(pdev);
+        if (ret)
+                return ret;
+        ret = virq_handler_init(pdev);
+        if (ret)
+                return ret;
+        return 0;
+}
+static int vm_err_handler_remove(struct platform_device *pdev)
+{
+        struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
+        struct device_node *node = pdev->dev.of_node;
+        if (of_remove_property(node,
+                of_find_property(node, "interrupts", NULL))) {
+                dev_err(ctrl->dev, "%s: failed to add interrupts property\n",
+                        __func__);
+                return -EACCES;
+        }
+        unregister_serr_hook(&ctrl->hook);
+        iounmap(ctrl->err_info);
+        dev_info(ctrl->dev, "%s: cleaned up and unregistered handler\n",
+                __func__);
+        return 0;
+}
+static const struct of_device_id tegra_hv_err_match[] = {
+        { .compatible = "nvidia,tegra-hv-err", .data = NULL},
+        {},
+};
+static struct platform_driver tegra_hv_err_pdriver = {
+        .driver = {
+                .name = "tegra-hv-err-handler",
+                .owner = THIS_MODULE,
+                .of_match_table = of_match_ptr(tegra_hv_err_match),
+        },
+        .probe = vm_err_handler_init,
+        .remove = vm_err_handler_remove,
+};
+static int tegra_hv_register_hooks_for_device(struct device *dev,
+        void *handlers)
+{
+        struct tegra_hv_err_ctrl *ctrl;
+        const struct platform_device *pd = container_of(dev,
+                struct platform_device, dev);
+        const struct vm_err_handlers *_handlers =
+                (struct vm_err_handlers *) handlers;
+        ctrl = platform_get_drvdata(pd);
+        if (!ctrl) {
+                dev_err(dev, "%s: no platform data", __func__);
+                return 0;
+        }
+        if (ctrl->handlers.fn_self_async == NULL)
+                ctrl->handlers.fn_self_async = _handlers->fn_self_async;
+        if (ctrl->handlers.fn_self_sync == NULL)
+                ctrl->handlers.fn_self_sync = _handlers->fn_self_sync;
+        if (ctrl->handlers.fn_peer == NULL)
+                ctrl->handlers.fn_peer = _handlers->fn_peer;
+        return 0;
+}
+int tegra_hv_register_vm_err_hooks(struct vm_err_handlers *handlers)
+{
+        int ret;
+        if (!handlers) {
+                pr_err("%s: invalid error handlers\n", __func__);
+                return 1;
+        }
+        if (!handlers->fn_self_async && !handlers->fn_self_sync
+                && !handlers->fn_peer) {
+                platform_driver_unregister(&tegra_hv_err_pdriver);
+                return 0;
+        }
+        if (!tegra_hv_err_pdriver.driver.p) {
+                /* Not registered/bound yet */
+                ret = platform_driver_register(&tegra_hv_err_pdriver);
+                if (ret) {
+                        pr_err("%s: failed to register driver. Err %d\n",
+                                __func__, ret);
+                        return ret;
+                }
+        }
+        ret = driver_for_each_device(&tegra_hv_err_pdriver.driver, NULL,
+                handlers, tegra_hv_register_hooks_for_device);
+        if (ret) {
+                pr_err("%s: failed to attach driver. Err %d\n", __func__, ret);
+                return ret;
+        }
+        return 0;
+}
+EXPORT_SYMBOL(tegra_hv_register_vm_err_hooks);
author	Yashomati <ygodbole@nvidia.com>	2019-05-31 21:59:52 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2019-12-24 14:56:43 -0500
commit	87dc30edda5936afa82b0afa821c8be2e44343c5 (patch)
tree	e1f61e27e96e88880626426db82dbe21c85e6053 /drivers/virt/tegra/vm_err.c
parent	cda3f78dc40d0f21b1108a4087b6198fb53bde02 (diff)

diff --git a/drivers/virt/tegra/vm_err.c b/drivers/virt/tegra/vm_err.c new file mode 100644 index 000000000..d9f11248c --- /dev/null +++ b/drivers/virt/tegra/vm_err.c
@@ -0,0 +1,535 @@
	1	/*
	2	* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*/
	13	#define pr_fmt(fmt) "vm-err: " fmt
	14
	15	#include <linux/interrupt.h>
	16	#include <linux/of_irq.h>
	17	#include <linux/platform_device.h>
	18	#include <linux/vm_err.h>
	19	#include <asm/traps.h>
	20	#include <asm-generic/irq_regs.h>
	21	#include <asm/system_misc.h>
	22	#include <soc/tegra/virt/syscalls.h>
	23	#include <soc/tegra/chip-id.h>
	24
	25	struct tegra_hv_err_ctrl {
	26	struct device *dev;
	27	struct errInfo *err_info;
	28	unsigned int async_err_arr_items;
	29	int hv_peer_err_irq_id;
	30	unsigned int vcpu_cnt;
	31	struct serr_hook hook;
	32	struct vm_err_handlers handlers;
	33	};
	34
	35	static struct tegra_hv_config config;
	36
	37	static unsigned int intr_info[3]; /* intr_property_size = 3 */
	38
	39	static struct property interrupts_prop = {
	40	.name = "interrupts",
	41	};
	42
	43	static bool check_sync_err(const unsigned int vcpu_id,
	44	const struct tegra_hv_err_ctrl *const ctrl,
	45	bool *send_sync_err_ack)
	46	{
	47	uint64_t rd_idx;
	48	const struct errData *err_data;
	49
	50	if (vcpu_id >= ctrl->vcpu_cnt) {
	51	dev_crit(ctrl->dev, "%s: Invalid vcpu id %u\n", __func__,
	52	vcpu_id);
	53	*send_sync_err_ack = false;
	54	/* Unexpected vcpu id. Enter bad mode. */
	55	return true;
	56	}
	57
	58	/* Shared memory layout is:
	59	* \|--async-err-metadata--\|--async-errors-array-\|--sync-errors-array-\|
	60	* Size of async errors array = Max errors + 1(to avoid same empty
	61	* and full conditions of the buffer)
	62	* Size of sync errors array = 1 error per VCPU * number of VCPUs in VM
	63	*/
	64	rd_idx = ctrl->async_err_arr_items + vcpu_id;
	65	/* It's already validated at init time that sufficient memory is
	66	* allocated to hold async_err_arr_items + sync error per vcpu. Hence,
	67	* after validating the vcpu_id above, no need to validate rd_idx here.
	68	*/
	69	err_data = &(ctrl->err_info->errData[rd_idx]);
	70	if (!err_data->sync_dataAbort.isFilled) {
	71	*send_sync_err_ack = false;
	72	dev_info(ctrl->dev, "No synchronous error data on vcpu %u\n",
	73	vcpu_id);
	74	/* No sync error. No need to enter bad mode. */
	75	return false;
	76	}
	77
	78	if (err_data->errType != SYNC) {
	79	dev_crit(ctrl->dev, "%s: unexpected error Type %d\n",
	80	__func__, err_data->errType);
	81	*send_sync_err_ack = true;
	82	/* Unexpected error id. Enter bad mode. */
	83	return true;
	84	}
	85
	86	if (err_data->offendingGuestId != config.guest_id_self) {
	87	dev_crit(ctrl->dev, "%s: invalid offender id %u\n", __func__,
	88	err_data->offendingGuestId);
	89	*send_sync_err_ack = true;
	90	/* Invalid id of offending guest. Enter bad mode. */
	91	return true;
	92	}
	93	dev_err(ctrl->dev, "Synchronous error on vcpu %u\n", vcpu_id);
	94
	95	if (ctrl->handlers.fn_self_sync) {
	96	*send_sync_err_ack = true;
	97	/* Enter bad_mode (or otherwise) as custom handler dictates */
	98	return ctrl->handlers.fn_self_sync(err_data);
	99	}
	100
	101	/* should never reach here */
	102	*send_sync_err_ack = true;
	103	/* Reaching here is unexpected. Enter bad mode. */
	104	return true;
	105	}
	106
	107	static irqreturn_t async_err_handler(int irq, void *context)
	108	{
	109	unsigned int num_async_errs_read = 0;
	110	bool enter_bad_mode = false;
	111	const struct tegra_hv_err_ctrl *const ctrl = context;
	112	const unsigned int vcpu_id = hyp_read_vcpu_id();
	113	uint64_t local_rd_idx, next_rd_idx;
	114	const struct errData *err_data;
	115	bool (fn_self_async)(const struct errData const err_data);
	116	bool (fn_peer)(const struct errData const err_data);
	117	bool (handler)(const struct errData const err_data);
	118	struct pt_regs *regs;
	119
	120	if (vcpu_id != 0) {
	121	dev_err(ctrl->dev, "Asynchronous error on vcpu %u\n", vcpu_id);
	122	/* Only VCPU0 is expected to receive async error vIRQ */
	123	return IRQ_HANDLED;
	124	}
	125
	126	fn_self_async = ctrl->handlers.fn_self_async;
	127	fn_peer = ctrl->handlers.fn_peer;
	128
	129	if ((fn_self_async == NULL) && (fn_peer == NULL)) {
	130	dev_err(ctrl->dev, "Asynchronous error handlers absent\n");
	131	return IRQ_HANDLED;
	132	}
	133
	134	local_rd_idx = ctrl->err_info->async_metaData.rdIdx;
	135	dev_dbg(ctrl->dev, "Local Rd Idx = %llu, shared Wr Idx = %llu\n",
	136	local_rd_idx, ctrl->err_info->async_metaData.wrIdx);
	137
	138	/* Check async error. Read until error queue gets empty */
	139	while (local_rd_idx != ctrl->err_info->async_metaData.wrIdx) {
	140	next_rd_idx = (local_rd_idx + 1) % ctrl->async_err_arr_items;
	141
	142	err_data = &(ctrl->err_info->errData[next_rd_idx]);
	143	if (err_data->offendingGuestId == config.guest_id_self)
	144	handler = fn_self_async;
	145	else
	146	handler = fn_peer;
	147
	148	if (handler) {
	149	if (handler(err_data) == true)
	150	enter_bad_mode = true;
	151	}
	152
	153	local_rd_idx = next_rd_idx;
	154	num_async_errs_read++;
	155	dev_dbg(ctrl->dev, "Local Rd Idx = %llu\n", local_rd_idx);
	156	}
	157
	158	if (num_async_errs_read) {
	159	dev_err(ctrl->dev, "%u asynchronous error(s) read\n",
	160	num_async_errs_read);
	161
	162	/* Send ack for async error(s) to HV */
	163	if (hyp_send_async_err_ack(local_rd_idx) != 0) {
	164	dev_crit(ctrl->dev,
	165	"%s: Sending ack failed. Setting bad mode\n",
	166	__func__);
	167	/* Unexpected */
	168	enter_bad_mode = true;
	169	}
	170	}
	171
	172	if (enter_bad_mode) {
	173	regs = get_irq_regs();
	174	die("Oops - bad mode", regs, 0);
	175	panic("bad mode");
	176	}
	177
	178	return IRQ_HANDLED;
	179	}
	180
	181	static int sync_err_handler(struct pt_regs *regs, int reason,
	182	uint32_t esr, void *context)
	183	{
	184	bool enter_bad_mode = false;
	185	bool send_sync_err_ack = false;
	186	const struct tegra_hv_err_ctrl *const ctrl = context;
	187	const unsigned int vcpu_id = hyp_read_vcpu_id();
	188
	189	/* Check sync error */
	190	if (check_sync_err(vcpu_id, ctrl, &send_sync_err_ack) == true)
	191	enter_bad_mode = true;
	192
	193	/* Send ack for error to HV. */
	194	if (send_sync_err_ack) {
	195	if (hyp_send_sync_err_ack(send_sync_err_ack) != 0) {
	196	dev_crit(ctrl->dev,
	197	"%s: Sending ack failed. Setting bad mode\n",
	198	__func__);
	199	/* Unexpected */
	200	enter_bad_mode = true;
	201	}
	202	}
	203
	204	/* Caller expects 0 to enter bad mode */
	205	return (!enter_bad_mode);
	206	}
	207
	208	void tegra_hv_get_config(struct tegra_hv_config *cfg)
	209	{
	210	cfg->guest_id_self = config.guest_id_self;
	211	cfg->num_guests = config.num_guests;
	212	}
	213	EXPORT_SYMBOL(tegra_hv_get_config);
	214
	215	static int virq_handler_init(const struct platform_device *pdev)
	216	{
	217	int ret;
	218	struct irq_data *peer_err_irq_data;
	219	int lin_peer_err_irq_id;
	220	struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
	221	struct device dev = pdev->dev;
	222
	223	dev_info(ctrl->dev, "Error notification HV IRQ id: %d\n",
	224	ctrl->hv_peer_err_irq_id);
	225
	226	/* Ensure HV returned valid irq */
	227	if (ctrl->hv_peer_err_irq_id == -1)
	228	return 0;
	229
	230	/* Set indicate irq type 0 to indicate Shared Peripheral Irq */
	231	intr_info[0] = cpu_to_be32(0);
	232	/* Id in SPI namespace - subtract number of PPIs
	233	* (Private Peripheral Irqs) which is = 32
	234	*/
	235	intr_info[1] = cpu_to_be32(ctrl->hv_peer_err_irq_id - 32);
	236	/* Trigger irq on low-to-high edge (0x1) */
	237	intr_info[2] = cpu_to_be32(IRQF_TRIGGER_RISING);
	238
	239	interrupts_prop.length = sizeof(intr_info);
	240	dev_info(ctrl->dev, "interrupts_prop.length %u\n",
	241	interrupts_prop.length);
	242
	243	interrupts_prop.value = intr_info;
	244
	245	if (of_add_property(dev.of_node, &interrupts_prop)) {
	246	dev_err(ctrl->dev, "%s: failed to add interrupts property\n",
	247	__func__);
	248	return -EACCES;
	249	}
	250
	251	lin_peer_err_irq_id = of_irq_get(dev.of_node, 0);
	252	if (lin_peer_err_irq_id < 0) {
	253	dev_err(ctrl->dev, "%s: Unable to get Linux irq for id %d\n",
	254	__func__, ctrl->hv_peer_err_irq_id);
	255	return lin_peer_err_irq_id;
	256	}
	257
	258	peer_err_irq_data = irq_get_irq_data(lin_peer_err_irq_id);
	259	if (peer_err_irq_data == NULL) {
	260	dev_err(ctrl->dev, "%s: Failed to get data for Linux irq %d\n",
	261	__func__, lin_peer_err_irq_id);
	262	return -ENODEV;
	263	}
	264
	265	ret = devm_request_irq(&dev, lin_peer_err_irq_id, async_err_handler,
	266	IRQ_NOTHREAD, dev_name(&dev), ctrl);
	267	if (ret < 0) {
	268	dev_err(ctrl->dev,
	269	"%s: failed to register IRQ %d, Err %d, %s\n",
	270	__func__, lin_peer_err_irq_id, ret, pdev->name);
	271	return ret;
	272	}
	273	dev_info(ctrl->dev, "Registered Linux IRQ %d for peer notification\n",
	274	lin_peer_err_irq_id);
	275
	276	return 0;
	277	}
	278
	279	static int serr_handler_init(struct platform_device *pdev)
	280	{
	281	struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
	282
	283	ctrl->hook.fn = sync_err_handler;
	284	ctrl->hook.priv = platform_get_drvdata(pdev);
	285	register_serr_hook(&ctrl->hook);
	286
	287	return 0;
	288	}
	289
	290	static int shared_mem_map(struct platform_device *pdev)
	291	{
	292	uint64_t ipa, buff_size, required_size;
	293	int ret;
	294	struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
	295
	296	/* Get error info details */
	297	ret = hyp_read_err_info_get(&ipa, &buff_size,
	298	&ctrl->async_err_arr_items, &ctrl->hv_peer_err_irq_id,
	299	&ctrl->vcpu_cnt);
	300	if (ret != 0) {
	301	/* It could come here if DTS and defconfig enable execution
	302	* of this code, but HV hasn't implemented the hypercall.
	303	* Flag error.
	304	*/
	305	dev_err(ctrl->dev,
	306	"%s: failed to get err memory address. Err %d\n",
	307	__func__, ret);
	308	return -ENODEV;
	309	}
	310
	311	if ((ipa == 0) \|\| (buff_size == 0) \|\|
	312	(ctrl->async_err_arr_items == 0)) {
	313	/* It could come here if DTS and defconfig enable execution
	314	* of this code, but PCT hasn't enabled error injection.
	315	* A warning should suffice.
	316	*/
	317	dev_warn(ctrl->dev, "%s: invalid shared memory parameters\n",
	318	__func__);
	319	return -ENOMEM;
	320	}
	321
	322	/* Shared memory layout is:
	323	* \|--async-err-metadata--\|--async-errors-array-\|--sync-errors-array-\|
	324	* Size of async errors array = Max errors + 1 (to avoid same empty and
	325	* full conditions of the buffer)
	326	* Size of sync errors array = 1 error per VCPU * number of VCPUs on
	327	* a VM
	328	*/
	329	required_size = sizeof(struct async_metaData) +
	330	(sizeof(struct errData) *
	331	(ctrl->async_err_arr_items + ctrl->vcpu_cnt));
	332	if (buff_size < required_size) {
	333	dev_err(ctrl->dev,
	334	"%s:invalid params. size %llu. required size %llu\n",
	335	__func__, buff_size, required_size);
	336	dev_err(ctrl->dev, "%s: async arr size %u. vcpus %u\n",
	337	__func__, ctrl->async_err_arr_items, ctrl->vcpu_cnt);
	338	return -ENOMEM;
	339	}
	340
	341	dev_info(ctrl->dev, "%s: Err info IPA for guest %u: 0x%llx\n",
	342	__func__, config.guest_id_self, ipa);
	343	dev_info(ctrl->dev, "Err info buf size 0x%llX\n", buff_size);
	344	dev_info(ctrl->dev, "Async err arr size %u. Number of VCPUs %u\n",
	345	ctrl->async_err_arr_items, ctrl->vcpu_cnt);
	346
	347	/* Map shared memory */
	348	ctrl->err_info = (struct errInfo *) ioremap_cache(ipa, buff_size);
	349	if (ctrl->err_info == NULL)
	350	return -ENOMEM;
	351
	352	return 0;
	353	}
	354
	355	static int hyp_config_init(struct device *dev)
	356	{
	357	int ret = hyp_read_gid(&config.guest_id_self);
	358
	359	if (ret != 0) {
	360	dev_err(dev, "%s: failed to read guest id. Err %d\n",
	361	__func__, ret);
	362	return ret;
	363	}
	364
	365	ret = hyp_read_nguests(&config.num_guests);
	366	if (ret != 0) {
	367	/* Only privileged guest can query number of guests */
	368	dev_warn(dev, "%s: can't read number of guests. Err %d\n",
	369	__func__, ret);
	370	}
	371
	372	dev_info(dev, "%s: guest id %u num guests %u\n", __func__,
	373	config.guest_id_self, config.num_guests);
	374
	375	return 0;
	376	}
	377
	378	static void shared_structs_check(struct device *dev)
	379	{
	380	/* Ensure coherency with common header */
	381	BUILD_BUG_ON(REASON_ENUM_SIZE != (ARRAY_SIZE(fault_reason_desc)));
	382
	383	/* Manually compare these sizes with HV console dump to ensure
	384	* common structures shared by HV and Linux are in sync
	385	*/
	386	dev_info(dev, "async_metaData size 0x%lx\n",
	387	sizeof(struct async_metaData));
	388	dev_info(dev, "async_bridgeErr size 0x%lx\n",
	389	sizeof(struct async_bridgeErr));
	390	dev_info(dev, "async_smmuErr size 0x%lx\n",
	391	sizeof(struct async_smmuErr));
	392	dev_info(dev, "async_mcErr size 0x%lx\n",
	393	sizeof(struct async_mcErr));
	394	dev_info(dev, "sync_dataAbort size 0x%lx\n",
	395	sizeof(struct sync_dataAbort));
	396	dev_info(dev, "errData size 0x%lx\n", sizeof(struct errData));
	397	}
	398
	399	static int vm_err_handler_init(struct platform_device *pdev)
	400	{
	401	int ret;
	402	struct tegra_hv_err_ctrl *ctrl;
	403	struct device *dev = &pdev->dev;
	404
	405	if (!is_tegra_hypervisor_mode()) {
	406	dev_err(dev, "%s: hypervisor is not present\n", __func__);
	407	return -ENODEV;
	408	}
	409
	410	shared_structs_check(dev);
	411
	412	ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL);
	413	if (!ctrl)
	414	return -ENOMEM;
	415
	416	ctrl->dev = dev;
	417	platform_set_drvdata(pdev, ctrl);
	418
	419	ret = hyp_config_init(dev);
	420	if (ret)
	421	return ret;
	422
	423	ret = shared_mem_map(pdev);
	424	if (ret)
	425	return -ENOMEM;
	426
	427	ret = serr_handler_init(pdev);
	428	if (ret)
	429	return ret;
	430
	431	ret = virq_handler_init(pdev);
	432	if (ret)
	433	return ret;
	434
	435	return 0;
	436	}
	437
	438	static int vm_err_handler_remove(struct platform_device *pdev)
	439	{
	440	struct tegra_hv_err_ctrl *ctrl = platform_get_drvdata(pdev);
	441	struct device_node *node = pdev->dev.of_node;
	442
	443	if (of_remove_property(node,
	444	of_find_property(node, "interrupts", NULL))) {
	445	dev_err(ctrl->dev, "%s: failed to add interrupts property\n",
	446	__func__);
	447	return -EACCES;
	448	}
	449
	450	unregister_serr_hook(&ctrl->hook);
	451	iounmap(ctrl->err_info);
	452
	453	dev_info(ctrl->dev, "%s: cleaned up and unregistered handler\n",
	454	__func__);
	455
	456	return 0;
	457	}
	458
	459	static const struct of_device_id tegra_hv_err_match[] = {
	460	{ .compatible = "nvidia,tegra-hv-err", .data = NULL},
	461	{},
	462	};
	463
	464	static struct platform_driver tegra_hv_err_pdriver = {
	465	.driver = {
	466	.name = "tegra-hv-err-handler",
	467	.owner = THIS_MODULE,
	468	.of_match_table = of_match_ptr(tegra_hv_err_match),
	469	},
	470	.probe = vm_err_handler_init,
	471	.remove = vm_err_handler_remove,
	472	};
	473
	474	static int tegra_hv_register_hooks_for_device(struct device *dev,
	475	void *handlers)
	476	{
	477	struct tegra_hv_err_ctrl *ctrl;
	478	const struct platform_device *pd = container_of(dev,
	479	struct platform_device, dev);
	480	const struct vm_err_handlers *_handlers =
	481	(struct vm_err_handlers *) handlers;
	482
	483	ctrl = platform_get_drvdata(pd);
	484	if (!ctrl) {
	485	dev_err(dev, "%s: no platform data", __func__);
	486	return 0;
	487	}
	488
	489	if (ctrl->handlers.fn_self_async == NULL)
	490	ctrl->handlers.fn_self_async = _handlers->fn_self_async;
	491
	492	if (ctrl->handlers.fn_self_sync == NULL)
	493	ctrl->handlers.fn_self_sync = _handlers->fn_self_sync;
	494
	495	if (ctrl->handlers.fn_peer == NULL)
	496	ctrl->handlers.fn_peer = _handlers->fn_peer;
	497
	498	return 0;
	499	}
	500
	501	int tegra_hv_register_vm_err_hooks(struct vm_err_handlers *handlers)
	502	{
	503	int ret;
	504
	505	if (!handlers) {
	506	pr_err("%s: invalid error handlers\n", __func__);
	507	return 1;
	508	}
	509
	510	if (!handlers->fn_self_async && !handlers->fn_self_sync
	511	&& !handlers->fn_peer) {
	512	platform_driver_unregister(&tegra_hv_err_pdriver);
	513	return 0;
	514	}
	515
	516	if (!tegra_hv_err_pdriver.driver.p) {
	517	/* Not registered/bound yet */
	518	ret = platform_driver_register(&tegra_hv_err_pdriver);
	519	if (ret) {
	520	pr_err("%s: failed to register driver. Err %d\n",
	521	__func__, ret);
	522	return ret;
	523	}
	524	}
	525
	526	ret = driver_for_each_device(&tegra_hv_err_pdriver.driver, NULL,
	527	handlers, tegra_hv_register_hooks_for_device);
	528	if (ret) {
	529	pr_err("%s: failed to attach driver. Err %d\n", __func__, ret);
	530	return ret;
	531	}
	532
	533	return 0;
	534	}
	535	EXPORT_SYMBOL(tegra_hv_register_vm_err_hooks);